xref: /linux/drivers/gpu/drm/xe/xe_lrc.c (revision 2c1ed907520c50326b8f604907a8478b27881a2e)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_lrc.h"
7 
8 #include <generated/xe_wa_oob.h>
9 
10 #include <linux/ascii85.h>
11 
12 #include "instructions/xe_mi_commands.h"
13 #include "instructions/xe_gfxpipe_commands.h"
14 #include "instructions/xe_gfx_state_commands.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_lrc_layout.h"
17 #include "xe_bb.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_drm_client.h"
21 #include "xe_exec_queue_types.h"
22 #include "xe_gt.h"
23 #include "xe_gt_printk.h"
24 #include "xe_hw_fence.h"
25 #include "xe_map.h"
26 #include "xe_memirq.h"
27 #include "xe_sriov.h"
28 #include "xe_trace_lrc.h"
29 #include "xe_vm.h"
30 #include "xe_wa.h"
31 
32 #define LRC_VALID				BIT_ULL(0)
33 #define LRC_PRIVILEGE				BIT_ULL(8)
34 #define LRC_ADDRESSING_MODE			GENMASK_ULL(4, 3)
35 #define LRC_LEGACY_64B_CONTEXT			3
36 
37 #define LRC_ENGINE_CLASS			GENMASK_ULL(63, 61)
38 #define LRC_ENGINE_INSTANCE			GENMASK_ULL(53, 48)
39 
40 #define LRC_INDIRECT_RING_STATE_SIZE		SZ_4K
41 
42 static struct xe_device *
lrc_to_xe(struct xe_lrc * lrc)43 lrc_to_xe(struct xe_lrc *lrc)
44 {
45 	return gt_to_xe(lrc->fence_ctx.gt);
46 }
47 
xe_gt_lrc_size(struct xe_gt * gt,enum xe_engine_class class)48 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
49 {
50 	struct xe_device *xe = gt_to_xe(gt);
51 	size_t size;
52 
53 	switch (class) {
54 	case XE_ENGINE_CLASS_RENDER:
55 		if (GRAPHICS_VER(xe) >= 20)
56 			size = 4 * SZ_4K;
57 		else
58 			size = 14 * SZ_4K;
59 		break;
60 	case XE_ENGINE_CLASS_COMPUTE:
61 		/* 14 pages since graphics_ver == 11 */
62 		if (GRAPHICS_VER(xe) >= 20)
63 			size = 3 * SZ_4K;
64 		else
65 			size = 14 * SZ_4K;
66 		break;
67 	default:
68 		WARN(1, "Unknown engine class: %d", class);
69 		fallthrough;
70 	case XE_ENGINE_CLASS_COPY:
71 	case XE_ENGINE_CLASS_VIDEO_DECODE:
72 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
73 	case XE_ENGINE_CLASS_OTHER:
74 		size = 2 * SZ_4K;
75 	}
76 
77 	/* Add indirect ring state page */
78 	if (xe_gt_has_indirect_ring_state(gt))
79 		size += LRC_INDIRECT_RING_STATE_SIZE;
80 
81 	return size;
82 }
83 
84 /*
85  * The per-platform tables are u8-encoded in @data. Decode @data and set the
86  * addresses' offset and commands in @regs. The following encoding is used
87  * for each byte. There are 2 steps: decoding commands and decoding addresses.
88  *
89  * Commands:
90  * [7]: create NOPs - number of NOPs are set in lower bits
91  * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
92  *      MI_LRI_FORCE_POSTED
93  * [5:0]: Number of NOPs or registers to set values to in case of
94  *        MI_LOAD_REGISTER_IMM
95  *
96  * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
97  * number of registers. They are set by using the REG/REG16 macros: the former
98  * is used for offsets smaller than 0x200 while the latter is for values bigger
99  * than that. Those macros already set all the bits documented below correctly:
100  *
101  * [7]: When a register offset needs more than 6 bits, use additional bytes, to
102  *      follow, for the lower bits
103  * [6:0]: Register offset, without considering the engine base.
104  *
105  * This function only tweaks the commands and register offsets. Values are not
106  * filled out.
107  */
set_offsets(u32 * regs,const u8 * data,const struct xe_hw_engine * hwe)108 static void set_offsets(u32 *regs,
109 			const u8 *data,
110 			const struct xe_hw_engine *hwe)
111 #define NOP(x) (BIT(7) | (x))
112 #define LRI(count, flags) ((flags) << 6 | (count) | \
113 			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
114 #define POSTED BIT(0)
115 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
116 #define REG16(x) \
117 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
118 	(((x) >> 2) & 0x7f)
119 {
120 	const u32 base = hwe->mmio_base;
121 
122 	while (*data) {
123 		u8 count, flags;
124 
125 		if (*data & BIT(7)) { /* skip */
126 			count = *data++ & ~BIT(7);
127 			regs += count;
128 			continue;
129 		}
130 
131 		count = *data & 0x3f;
132 		flags = *data >> 6;
133 		data++;
134 
135 		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
136 		if (flags & POSTED)
137 			*regs |= MI_LRI_FORCE_POSTED;
138 		*regs |= MI_LRI_LRM_CS_MMIO;
139 		regs++;
140 
141 		xe_gt_assert(hwe->gt, count);
142 		do {
143 			u32 offset = 0;
144 			u8 v;
145 
146 			do {
147 				v = *data++;
148 				offset <<= 7;
149 				offset |= v & ~BIT(7);
150 			} while (v & BIT(7));
151 
152 			regs[0] = base + (offset << 2);
153 			regs += 2;
154 		} while (--count);
155 	}
156 
157 	*regs = MI_BATCH_BUFFER_END | BIT(0);
158 }
159 
160 static const u8 gen12_xcs_offsets[] = {
161 	NOP(1),
162 	LRI(13, POSTED),
163 	REG16(0x244),
164 	REG(0x034),
165 	REG(0x030),
166 	REG(0x038),
167 	REG(0x03c),
168 	REG(0x168),
169 	REG(0x140),
170 	REG(0x110),
171 	REG(0x1c0),
172 	REG(0x1c4),
173 	REG(0x1c8),
174 	REG(0x180),
175 	REG16(0x2b4),
176 
177 	NOP(5),
178 	LRI(9, POSTED),
179 	REG16(0x3a8),
180 	REG16(0x28c),
181 	REG16(0x288),
182 	REG16(0x284),
183 	REG16(0x280),
184 	REG16(0x27c),
185 	REG16(0x278),
186 	REG16(0x274),
187 	REG16(0x270),
188 
189 	0
190 };
191 
192 static const u8 dg2_xcs_offsets[] = {
193 	NOP(1),
194 	LRI(15, POSTED),
195 	REG16(0x244),
196 	REG(0x034),
197 	REG(0x030),
198 	REG(0x038),
199 	REG(0x03c),
200 	REG(0x168),
201 	REG(0x140),
202 	REG(0x110),
203 	REG(0x1c0),
204 	REG(0x1c4),
205 	REG(0x1c8),
206 	REG(0x180),
207 	REG16(0x2b4),
208 	REG(0x120),
209 	REG(0x124),
210 
211 	NOP(1),
212 	LRI(9, POSTED),
213 	REG16(0x3a8),
214 	REG16(0x28c),
215 	REG16(0x288),
216 	REG16(0x284),
217 	REG16(0x280),
218 	REG16(0x27c),
219 	REG16(0x278),
220 	REG16(0x274),
221 	REG16(0x270),
222 
223 	0
224 };
225 
226 static const u8 gen12_rcs_offsets[] = {
227 	NOP(1),
228 	LRI(13, POSTED),
229 	REG16(0x244),
230 	REG(0x034),
231 	REG(0x030),
232 	REG(0x038),
233 	REG(0x03c),
234 	REG(0x168),
235 	REG(0x140),
236 	REG(0x110),
237 	REG(0x1c0),
238 	REG(0x1c4),
239 	REG(0x1c8),
240 	REG(0x180),
241 	REG16(0x2b4),
242 
243 	NOP(5),
244 	LRI(9, POSTED),
245 	REG16(0x3a8),
246 	REG16(0x28c),
247 	REG16(0x288),
248 	REG16(0x284),
249 	REG16(0x280),
250 	REG16(0x27c),
251 	REG16(0x278),
252 	REG16(0x274),
253 	REG16(0x270),
254 
255 	LRI(3, POSTED),
256 	REG(0x1b0),
257 	REG16(0x5a8),
258 	REG16(0x5ac),
259 
260 	NOP(6),
261 	LRI(1, 0),
262 	REG(0x0c8),
263 	NOP(3 + 9 + 1),
264 
265 	LRI(51, POSTED),
266 	REG16(0x588),
267 	REG16(0x588),
268 	REG16(0x588),
269 	REG16(0x588),
270 	REG16(0x588),
271 	REG16(0x588),
272 	REG(0x028),
273 	REG(0x09c),
274 	REG(0x0c0),
275 	REG(0x178),
276 	REG(0x17c),
277 	REG16(0x358),
278 	REG(0x170),
279 	REG(0x150),
280 	REG(0x154),
281 	REG(0x158),
282 	REG16(0x41c),
283 	REG16(0x600),
284 	REG16(0x604),
285 	REG16(0x608),
286 	REG16(0x60c),
287 	REG16(0x610),
288 	REG16(0x614),
289 	REG16(0x618),
290 	REG16(0x61c),
291 	REG16(0x620),
292 	REG16(0x624),
293 	REG16(0x628),
294 	REG16(0x62c),
295 	REG16(0x630),
296 	REG16(0x634),
297 	REG16(0x638),
298 	REG16(0x63c),
299 	REG16(0x640),
300 	REG16(0x644),
301 	REG16(0x648),
302 	REG16(0x64c),
303 	REG16(0x650),
304 	REG16(0x654),
305 	REG16(0x658),
306 	REG16(0x65c),
307 	REG16(0x660),
308 	REG16(0x664),
309 	REG16(0x668),
310 	REG16(0x66c),
311 	REG16(0x670),
312 	REG16(0x674),
313 	REG16(0x678),
314 	REG16(0x67c),
315 	REG(0x068),
316 	REG(0x084),
317 	NOP(1),
318 
319 	0
320 };
321 
322 static const u8 xehp_rcs_offsets[] = {
323 	NOP(1),
324 	LRI(13, POSTED),
325 	REG16(0x244),
326 	REG(0x034),
327 	REG(0x030),
328 	REG(0x038),
329 	REG(0x03c),
330 	REG(0x168),
331 	REG(0x140),
332 	REG(0x110),
333 	REG(0x1c0),
334 	REG(0x1c4),
335 	REG(0x1c8),
336 	REG(0x180),
337 	REG16(0x2b4),
338 
339 	NOP(5),
340 	LRI(9, POSTED),
341 	REG16(0x3a8),
342 	REG16(0x28c),
343 	REG16(0x288),
344 	REG16(0x284),
345 	REG16(0x280),
346 	REG16(0x27c),
347 	REG16(0x278),
348 	REG16(0x274),
349 	REG16(0x270),
350 
351 	LRI(3, POSTED),
352 	REG(0x1b0),
353 	REG16(0x5a8),
354 	REG16(0x5ac),
355 
356 	NOP(6),
357 	LRI(1, 0),
358 	REG(0x0c8),
359 
360 	0
361 };
362 
363 static const u8 dg2_rcs_offsets[] = {
364 	NOP(1),
365 	LRI(15, POSTED),
366 	REG16(0x244),
367 	REG(0x034),
368 	REG(0x030),
369 	REG(0x038),
370 	REG(0x03c),
371 	REG(0x168),
372 	REG(0x140),
373 	REG(0x110),
374 	REG(0x1c0),
375 	REG(0x1c4),
376 	REG(0x1c8),
377 	REG(0x180),
378 	REG16(0x2b4),
379 	REG(0x120),
380 	REG(0x124),
381 
382 	NOP(1),
383 	LRI(9, POSTED),
384 	REG16(0x3a8),
385 	REG16(0x28c),
386 	REG16(0x288),
387 	REG16(0x284),
388 	REG16(0x280),
389 	REG16(0x27c),
390 	REG16(0x278),
391 	REG16(0x274),
392 	REG16(0x270),
393 
394 	LRI(3, POSTED),
395 	REG(0x1b0),
396 	REG16(0x5a8),
397 	REG16(0x5ac),
398 
399 	NOP(6),
400 	LRI(1, 0),
401 	REG(0x0c8),
402 
403 	0
404 };
405 
406 static const u8 mtl_rcs_offsets[] = {
407 	NOP(1),
408 	LRI(15, POSTED),
409 	REG16(0x244),
410 	REG(0x034),
411 	REG(0x030),
412 	REG(0x038),
413 	REG(0x03c),
414 	REG(0x168),
415 	REG(0x140),
416 	REG(0x110),
417 	REG(0x1c0),
418 	REG(0x1c4),
419 	REG(0x1c8),
420 	REG(0x180),
421 	REG16(0x2b4),
422 	REG(0x120),
423 	REG(0x124),
424 
425 	NOP(1),
426 	LRI(9, POSTED),
427 	REG16(0x3a8),
428 	REG16(0x28c),
429 	REG16(0x288),
430 	REG16(0x284),
431 	REG16(0x280),
432 	REG16(0x27c),
433 	REG16(0x278),
434 	REG16(0x274),
435 	REG16(0x270),
436 
437 	NOP(2),
438 	LRI(2, POSTED),
439 	REG16(0x5a8),
440 	REG16(0x5ac),
441 
442 	NOP(6),
443 	LRI(1, 0),
444 	REG(0x0c8),
445 
446 	0
447 };
448 
449 #define XE2_CTX_COMMON \
450 	NOP(1),                 /* [0x00] */ \
451 	LRI(15, POSTED),        /* [0x01] */ \
452 	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
453 	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
454 	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
455 	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
456 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
457 	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
458 	REG(0x140),             /* [0x0e] BB_ADDR */ \
459 	REG(0x110),             /* [0x10] BB_STATE */ \
460 	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
461 	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
462 	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
463 	REG(0x180),             /* [0x18] CCID */ \
464 	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
465 	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
466 	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
467 	\
468 	NOP(1),                 /* [0x20] */ \
469 	LRI(9, POSTED),         /* [0x21] */ \
470 	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
471 	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
472 	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
473 	REG16(0x284),           /* [0x28] dummy reg */ \
474 	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
475 	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
476 	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
477 	REG16(0x274),           /* [0x30] PTBP_UDW */ \
478 	REG16(0x270)            /* [0x32] PTBP_LDW */
479 
480 static const u8 xe2_rcs_offsets[] = {
481 	XE2_CTX_COMMON,
482 
483 	NOP(2),                 /* [0x34] */
484 	LRI(2, POSTED),         /* [0x36] */
485 	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
486 	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
487 
488 	NOP(6),                 /* [0x41] */
489 	LRI(1, 0),              /* [0x47] */
490 	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
491 
492 	0
493 };
494 
495 static const u8 xe2_bcs_offsets[] = {
496 	XE2_CTX_COMMON,
497 
498 	NOP(4 + 8 + 1),         /* [0x34] */
499 	LRI(2, POSTED),         /* [0x41] */
500 	REG16(0x200),           /* [0x42] BCS_SWCTRL */
501 	REG16(0x204),           /* [0x44] BLIT_CCTL */
502 
503 	0
504 };
505 
506 static const u8 xe2_xcs_offsets[] = {
507 	XE2_CTX_COMMON,
508 
509 	0
510 };
511 
512 static const u8 xe2_indirect_ring_state_offsets[] = {
513 	NOP(1),                 /* [0x00] */
514 	LRI(5, POSTED),         /* [0x01] */
515 	REG(0x034),             /* [0x02] RING_BUFFER_HEAD */
516 	REG(0x030),             /* [0x04] RING_BUFFER_TAIL */
517 	REG(0x038),             /* [0x06] RING_BUFFER_START */
518 	REG(0x048),             /* [0x08] RING_BUFFER_START_UDW */
519 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */
520 
521 	NOP(5),                 /* [0x0c] */
522 	LRI(9, POSTED),         /* [0x11] */
523 	REG(0x168),             /* [0x12] BB_ADDR_UDW */
524 	REG(0x140),             /* [0x14] BB_ADDR */
525 	REG(0x110),             /* [0x16] BB_STATE */
526 	REG16(0x588),           /* [0x18] BB_STACK_WRITE_PORT */
527 	REG16(0x588),           /* [0x20] BB_STACK_WRITE_PORT */
528 	REG16(0x588),           /* [0x22] BB_STACK_WRITE_PORT */
529 	REG16(0x588),           /* [0x24] BB_STACK_WRITE_PORT */
530 	REG16(0x588),           /* [0x26] BB_STACK_WRITE_PORT */
531 	REG16(0x588),           /* [0x28] BB_STACK_WRITE_PORT */
532 
533 	NOP(12),                 /* [0x00] */
534 
535 	0
536 };
537 
538 #undef REG16
539 #undef REG
540 #undef LRI
541 #undef NOP
542 
reg_offsets(struct xe_device * xe,enum xe_engine_class class)543 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
544 {
545 	if (class == XE_ENGINE_CLASS_RENDER) {
546 		if (GRAPHICS_VER(xe) >= 20)
547 			return xe2_rcs_offsets;
548 		else if (GRAPHICS_VERx100(xe) >= 1270)
549 			return mtl_rcs_offsets;
550 		else if (GRAPHICS_VERx100(xe) >= 1255)
551 			return dg2_rcs_offsets;
552 		else if (GRAPHICS_VERx100(xe) >= 1250)
553 			return xehp_rcs_offsets;
554 		else
555 			return gen12_rcs_offsets;
556 	} else if (class == XE_ENGINE_CLASS_COPY) {
557 		if (GRAPHICS_VER(xe) >= 20)
558 			return xe2_bcs_offsets;
559 		else
560 			return gen12_xcs_offsets;
561 	} else {
562 		if (GRAPHICS_VER(xe) >= 20)
563 			return xe2_xcs_offsets;
564 		else if (GRAPHICS_VERx100(xe) >= 1255)
565 			return dg2_xcs_offsets;
566 		else
567 			return gen12_xcs_offsets;
568 	}
569 }
570 
set_context_control(u32 * regs,struct xe_hw_engine * hwe)571 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
572 {
573 	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
574 						       CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
575 
576 	if (xe_gt_has_indirect_ring_state(hwe->gt))
577 		regs[CTX_CONTEXT_CONTROL] |=
578 			_MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
579 
580 	/* TODO: Timestamp */
581 }
582 
set_memory_based_intr(u32 * regs,struct xe_hw_engine * hwe)583 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
584 {
585 	struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
586 	struct xe_device *xe = gt_to_xe(hwe->gt);
587 	u8 num_regs;
588 
589 	if (!xe_device_uses_memirq(xe))
590 		return;
591 
592 	regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
593 					MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
594 	regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
595 	regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
596 
597 	num_regs = xe_device_has_msix(xe) ? 3 : 2;
598 	regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) |
599 				       MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
600 	regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
601 	regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
602 	regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
603 	regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
604 
605 	if (xe_device_has_msix(xe)) {
606 		regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr;
607 		/* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */
608 	}
609 }
610 
lrc_ring_mi_mode(struct xe_hw_engine * hwe)611 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
612 {
613 	struct xe_device *xe = gt_to_xe(hwe->gt);
614 
615 	if (GRAPHICS_VERx100(xe) >= 1250)
616 		return 0x70;
617 	else
618 		return 0x60;
619 }
620 
reset_stop_ring(u32 * regs,struct xe_hw_engine * hwe)621 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
622 {
623 	int x;
624 
625 	x = lrc_ring_mi_mode(hwe);
626 	regs[x + 1] &= ~STOP_RING;
627 	regs[x + 1] |= STOP_RING << 16;
628 }
629 
xe_lrc_has_indirect_ring_state(struct xe_lrc * lrc)630 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
631 {
632 	return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
633 }
634 
__xe_lrc_ring_offset(struct xe_lrc * lrc)635 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
636 {
637 	return 0;
638 }
639 
xe_lrc_pphwsp_offset(struct xe_lrc * lrc)640 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
641 {
642 	return lrc->ring.size;
643 }
644 
645 /* Make the magic macros work */
646 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
647 #define __xe_lrc_regs_offset xe_lrc_regs_offset
648 
649 #define LRC_SEQNO_PPHWSP_OFFSET 512
650 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
651 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
652 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
653 #define LRC_PPHWSP_SIZE SZ_4K
654 
xe_lrc_regs_offset(struct xe_lrc * lrc)655 u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
656 {
657 	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
658 }
659 
lrc_reg_size(struct xe_device * xe)660 static size_t lrc_reg_size(struct xe_device *xe)
661 {
662 	if (GRAPHICS_VERx100(xe) >= 1250)
663 		return 96 * sizeof(u32);
664 	else
665 		return 80 * sizeof(u32);
666 }
667 
xe_lrc_skip_size(struct xe_device * xe)668 size_t xe_lrc_skip_size(struct xe_device *xe)
669 {
670 	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
671 }
672 
__xe_lrc_seqno_offset(struct xe_lrc * lrc)673 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
674 {
675 	/* The seqno is stored in the driver-defined portion of PPHWSP */
676 	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
677 }
678 
__xe_lrc_start_seqno_offset(struct xe_lrc * lrc)679 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
680 {
681 	/* The start seqno is stored in the driver-defined portion of PPHWSP */
682 	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
683 }
684 
__xe_lrc_ctx_job_timestamp_offset(struct xe_lrc * lrc)685 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
686 {
687 	/* The start seqno is stored in the driver-defined portion of PPHWSP */
688 	return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
689 }
690 
__xe_lrc_parallel_offset(struct xe_lrc * lrc)691 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
692 {
693 	/* The parallel is stored in the driver-defined portion of PPHWSP */
694 	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
695 }
696 
__xe_lrc_ctx_timestamp_offset(struct xe_lrc * lrc)697 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
698 {
699 	return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
700 }
701 
__xe_lrc_indirect_ring_offset(struct xe_lrc * lrc)702 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
703 {
704 	/* Indirect ring state page is at the very end of LRC */
705 	return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
706 }
707 
708 #define DECL_MAP_ADDR_HELPERS(elem) \
709 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
710 { \
711 	struct iosys_map map = lrc->bo->vmap; \
712 \
713 	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
714 	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
715 	return map; \
716 } \
717 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
718 { \
719 	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
720 } \
721 
722 DECL_MAP_ADDR_HELPERS(ring)
DECL_MAP_ADDR_HELPERS(pphwsp)723 DECL_MAP_ADDR_HELPERS(pphwsp)
724 DECL_MAP_ADDR_HELPERS(seqno)
725 DECL_MAP_ADDR_HELPERS(regs)
726 DECL_MAP_ADDR_HELPERS(start_seqno)
727 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
728 DECL_MAP_ADDR_HELPERS(ctx_timestamp)
729 DECL_MAP_ADDR_HELPERS(parallel)
730 DECL_MAP_ADDR_HELPERS(indirect_ring)
731 
732 #undef DECL_MAP_ADDR_HELPERS
733 
734 /**
735  * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
736  * @lrc: Pointer to the lrc.
737  *
738  * Returns: ctx timestamp GGTT address
739  */
740 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
741 {
742 	return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
743 }
744 
745 /**
746  * xe_lrc_ctx_timestamp() - Read ctx timestamp value
747  * @lrc: Pointer to the lrc.
748  *
749  * Returns: ctx timestamp value
750  */
xe_lrc_ctx_timestamp(struct xe_lrc * lrc)751 u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
752 {
753 	struct xe_device *xe = lrc_to_xe(lrc);
754 	struct iosys_map map;
755 
756 	map = __xe_lrc_ctx_timestamp_map(lrc);
757 	return xe_map_read32(xe, &map);
758 }
759 
760 /**
761  * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
762  * @lrc: Pointer to the lrc.
763  *
764  * Returns: ctx timestamp job GGTT address
765  */
xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc * lrc)766 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
767 {
768 	return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
769 }
770 
771 /**
772  * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
773  * @lrc: Pointer to the lrc.
774  *
775  * Returns: ctx timestamp job value
776  */
xe_lrc_ctx_job_timestamp(struct xe_lrc * lrc)777 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
778 {
779 	struct xe_device *xe = lrc_to_xe(lrc);
780 	struct iosys_map map;
781 
782 	map = __xe_lrc_ctx_job_timestamp_map(lrc);
783 	return xe_map_read32(xe, &map);
784 }
785 
xe_lrc_ggtt_addr(struct xe_lrc * lrc)786 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
787 {
788 	return __xe_lrc_pphwsp_ggtt_addr(lrc);
789 }
790 
xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc * lrc)791 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
792 {
793 	if (!xe_lrc_has_indirect_ring_state(lrc))
794 		return 0;
795 
796 	return __xe_lrc_indirect_ring_ggtt_addr(lrc);
797 }
798 
xe_lrc_read_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr)799 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
800 {
801 	struct xe_device *xe = lrc_to_xe(lrc);
802 	struct iosys_map map;
803 
804 	map = __xe_lrc_indirect_ring_map(lrc);
805 	iosys_map_incr(&map, reg_nr * sizeof(u32));
806 	return xe_map_read32(xe, &map);
807 }
808 
xe_lrc_write_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)809 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
810 					  int reg_nr, u32 val)
811 {
812 	struct xe_device *xe = lrc_to_xe(lrc);
813 	struct iosys_map map;
814 
815 	map = __xe_lrc_indirect_ring_map(lrc);
816 	iosys_map_incr(&map, reg_nr * sizeof(u32));
817 	xe_map_write32(xe, &map, val);
818 }
819 
xe_lrc_read_ctx_reg(struct xe_lrc * lrc,int reg_nr)820 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
821 {
822 	struct xe_device *xe = lrc_to_xe(lrc);
823 	struct iosys_map map;
824 
825 	map = __xe_lrc_regs_map(lrc);
826 	iosys_map_incr(&map, reg_nr * sizeof(u32));
827 	return xe_map_read32(xe, &map);
828 }
829 
xe_lrc_write_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)830 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
831 {
832 	struct xe_device *xe = lrc_to_xe(lrc);
833 	struct iosys_map map;
834 
835 	map = __xe_lrc_regs_map(lrc);
836 	iosys_map_incr(&map, reg_nr * sizeof(u32));
837 	xe_map_write32(xe, &map, val);
838 }
839 
empty_lrc_data(struct xe_hw_engine * hwe)840 static void *empty_lrc_data(struct xe_hw_engine *hwe)
841 {
842 	struct xe_gt *gt = hwe->gt;
843 	void *data;
844 	u32 *regs;
845 
846 	data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
847 	if (!data)
848 		return NULL;
849 
850 	/* 1st page: Per-Process of HW status Page */
851 	regs = data + LRC_PPHWSP_SIZE;
852 	set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
853 	set_context_control(regs, hwe);
854 	set_memory_based_intr(regs, hwe);
855 	reset_stop_ring(regs, hwe);
856 	if (xe_gt_has_indirect_ring_state(gt)) {
857 		regs = data + xe_gt_lrc_size(gt, hwe->class) -
858 		       LRC_INDIRECT_RING_STATE_SIZE;
859 		set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
860 	}
861 
862 	return data;
863 }
864 
xe_lrc_set_ppgtt(struct xe_lrc * lrc,struct xe_vm * vm)865 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
866 {
867 	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
868 
869 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
870 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
871 }
872 
xe_lrc_finish(struct xe_lrc * lrc)873 static void xe_lrc_finish(struct xe_lrc *lrc)
874 {
875 	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
876 	xe_bo_lock(lrc->bo, false);
877 	xe_bo_unpin(lrc->bo);
878 	xe_bo_unlock(lrc->bo);
879 	xe_bo_put(lrc->bo);
880 }
881 
882 #define PVC_CTX_ASID		(0x2e + 1)
883 #define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
884 
xe_lrc_init(struct xe_lrc * lrc,struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size,u16 msix_vec)885 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
886 		       struct xe_vm *vm, u32 ring_size, u16 msix_vec)
887 {
888 	struct xe_gt *gt = hwe->gt;
889 	struct xe_tile *tile = gt_to_tile(gt);
890 	struct xe_device *xe = gt_to_xe(gt);
891 	struct iosys_map map;
892 	void *init_data = NULL;
893 	u32 arb_enable;
894 	u32 lrc_size;
895 	int err;
896 
897 	kref_init(&lrc->refcount);
898 	lrc->flags = 0;
899 	lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
900 	if (xe_gt_has_indirect_ring_state(gt))
901 		lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
902 
903 	/*
904 	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
905 	 * via VM bind calls.
906 	 */
907 	lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
908 				       ttm_bo_type_kernel,
909 				       XE_BO_FLAG_VRAM_IF_DGFX(tile) |
910 				       XE_BO_FLAG_GGTT |
911 				       XE_BO_FLAG_GGTT_INVALIDATE);
912 	if (IS_ERR(lrc->bo))
913 		return PTR_ERR(lrc->bo);
914 
915 	lrc->size = lrc_size;
916 	lrc->tile = gt_to_tile(hwe->gt);
917 	lrc->ring.size = ring_size;
918 	lrc->ring.tail = 0;
919 	lrc->ctx_timestamp = 0;
920 
921 	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
922 			     hwe->fence_irq, hwe->name);
923 
924 	if (!gt->default_lrc[hwe->class]) {
925 		init_data = empty_lrc_data(hwe);
926 		if (!init_data) {
927 			err = -ENOMEM;
928 			goto err_lrc_finish;
929 		}
930 	}
931 
932 	/*
933 	 * Init Per-Process of HW status Page, LRC / context state to known
934 	 * values
935 	 */
936 	map = __xe_lrc_pphwsp_map(lrc);
937 	if (!init_data) {
938 		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
939 		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
940 				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
941 				 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
942 	} else {
943 		xe_map_memcpy_to(xe, &map, 0, init_data,
944 				 xe_gt_lrc_size(gt, hwe->class));
945 		kfree(init_data);
946 	}
947 
948 	if (vm) {
949 		xe_lrc_set_ppgtt(lrc, vm);
950 
951 		if (vm->xef)
952 			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
953 	}
954 
955 	if (xe_device_has_msix(xe)) {
956 		xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR,
957 				     xe_memirq_status_ptr(&tile->memirq, hwe));
958 		xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR,
959 				     xe_memirq_source_ptr(&tile->memirq, hwe));
960 		xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec);
961 	}
962 
963 	if (xe_gt_has_indirect_ring_state(gt)) {
964 		xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
965 				     __xe_lrc_indirect_ring_ggtt_addr(lrc));
966 
967 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
968 					      __xe_lrc_ring_ggtt_addr(lrc));
969 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
970 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
971 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
972 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
973 					      RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
974 	} else {
975 		xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
976 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
977 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
978 		xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
979 				     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
980 	}
981 
982 	xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
983 
984 	if (xe->info.has_asid && vm)
985 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
986 
987 	lrc->desc = LRC_VALID;
988 	lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
989 	/* TODO: Priority */
990 
991 	/* While this appears to have something about privileged batches or
992 	 * some such, it really just means PPGTT mode.
993 	 */
994 	if (vm)
995 		lrc->desc |= LRC_PRIVILEGE;
996 
997 	if (GRAPHICS_VERx100(xe) < 1250) {
998 		lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
999 		lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
1000 	}
1001 
1002 	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1003 	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
1004 
1005 	map = __xe_lrc_seqno_map(lrc);
1006 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1007 
1008 	map = __xe_lrc_start_seqno_map(lrc);
1009 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1010 
1011 	return 0;
1012 
1013 err_lrc_finish:
1014 	xe_lrc_finish(lrc);
1015 	return err;
1016 }
1017 
1018 /**
1019  * xe_lrc_create - Create a LRC
1020  * @hwe: Hardware Engine
1021  * @vm: The VM (address space)
1022  * @ring_size: LRC ring size
1023  * @msix_vec: MSI-X interrupt vector (for platforms that support it)
1024  *
1025  * Allocate and initialize the Logical Ring Context (LRC).
1026  *
1027  * Return pointer to created LRC upon success and an error pointer
1028  * upon failure.
1029  */
xe_lrc_create(struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size,u16 msix_vec)1030 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
1031 			     u32 ring_size, u16 msix_vec)
1032 {
1033 	struct xe_lrc *lrc;
1034 	int err;
1035 
1036 	lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
1037 	if (!lrc)
1038 		return ERR_PTR(-ENOMEM);
1039 
1040 	err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec);
1041 	if (err) {
1042 		kfree(lrc);
1043 		return ERR_PTR(err);
1044 	}
1045 
1046 	return lrc;
1047 }
1048 
1049 /**
1050  * xe_lrc_destroy - Destroy the LRC
1051  * @ref: reference to LRC
1052  *
1053  * Called when ref == 0, release resources held by the Logical Ring Context
1054  * (LRC) and free the LRC memory.
1055  */
xe_lrc_destroy(struct kref * ref)1056 void xe_lrc_destroy(struct kref *ref)
1057 {
1058 	struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
1059 
1060 	xe_lrc_finish(lrc);
1061 	kfree(lrc);
1062 }
1063 
xe_lrc_set_ring_tail(struct xe_lrc * lrc,u32 tail)1064 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
1065 {
1066 	if (xe_lrc_has_indirect_ring_state(lrc))
1067 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
1068 	else
1069 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
1070 }
1071 
xe_lrc_ring_tail(struct xe_lrc * lrc)1072 u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1073 {
1074 	if (xe_lrc_has_indirect_ring_state(lrc))
1075 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1076 	else
1077 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1078 }
1079 
xe_lrc_ring_start(struct xe_lrc * lrc)1080 static u32 xe_lrc_ring_start(struct xe_lrc *lrc)
1081 {
1082 	if (xe_lrc_has_indirect_ring_state(lrc))
1083 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START);
1084 	else
1085 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_START);
1086 }
1087 
xe_lrc_set_ring_head(struct xe_lrc * lrc,u32 head)1088 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1089 {
1090 	if (xe_lrc_has_indirect_ring_state(lrc))
1091 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1092 	else
1093 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
1094 }
1095 
xe_lrc_ring_head(struct xe_lrc * lrc)1096 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1097 {
1098 	if (xe_lrc_has_indirect_ring_state(lrc))
1099 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1100 	else
1101 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
1102 }
1103 
xe_lrc_ring_space(struct xe_lrc * lrc)1104 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1105 {
1106 	const u32 head = xe_lrc_ring_head(lrc);
1107 	const u32 tail = lrc->ring.tail;
1108 	const u32 size = lrc->ring.size;
1109 
1110 	return ((head - tail - 1) & (size - 1)) + 1;
1111 }
1112 
__xe_lrc_write_ring(struct xe_lrc * lrc,struct iosys_map ring,const void * data,size_t size)1113 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1114 				const void *data, size_t size)
1115 {
1116 	struct xe_device *xe = lrc_to_xe(lrc);
1117 
1118 	iosys_map_incr(&ring, lrc->ring.tail);
1119 	xe_map_memcpy_to(xe, &ring, 0, data, size);
1120 	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1121 }
1122 
xe_lrc_write_ring(struct xe_lrc * lrc,const void * data,size_t size)1123 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1124 {
1125 	struct xe_device *xe = lrc_to_xe(lrc);
1126 	struct iosys_map ring;
1127 	u32 rhs;
1128 	size_t aligned_size;
1129 
1130 	xe_assert(xe, IS_ALIGNED(size, 4));
1131 	aligned_size = ALIGN(size, 8);
1132 
1133 	ring = __xe_lrc_ring_map(lrc);
1134 
1135 	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
1136 	rhs = lrc->ring.size - lrc->ring.tail;
1137 	if (size > rhs) {
1138 		__xe_lrc_write_ring(lrc, ring, data, rhs);
1139 		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1140 	} else {
1141 		__xe_lrc_write_ring(lrc, ring, data, size);
1142 	}
1143 
1144 	if (aligned_size > size) {
1145 		u32 noop = MI_NOOP;
1146 
1147 		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1148 	}
1149 }
1150 
xe_lrc_descriptor(struct xe_lrc * lrc)1151 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1152 {
1153 	return lrc->desc | xe_lrc_ggtt_addr(lrc);
1154 }
1155 
xe_lrc_seqno_ggtt_addr(struct xe_lrc * lrc)1156 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1157 {
1158 	return __xe_lrc_seqno_ggtt_addr(lrc);
1159 }
1160 
1161 /**
1162  * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1163  *
1164  * Allocate but don't initialize an lrc seqno fence.
1165  *
1166  * Return: Pointer to the allocated fence or
1167  * negative error pointer on error.
1168  */
xe_lrc_alloc_seqno_fence(void)1169 struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1170 {
1171 	return xe_hw_fence_alloc();
1172 }
1173 
1174 /**
1175  * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1176  * @fence: Pointer to the fence to free.
1177  *
1178  * Frees an lrc seqno fence that hasn't yet been
1179  * initialized.
1180  */
xe_lrc_free_seqno_fence(struct dma_fence * fence)1181 void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1182 {
1183 	xe_hw_fence_free(fence);
1184 }
1185 
1186 /**
1187  * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1188  * @lrc: Pointer to the lrc.
1189  * @fence: Pointer to the fence to initialize.
1190  *
1191  * Initializes a pre-allocated lrc seqno fence.
1192  * After initialization, the fence is subject to normal
1193  * dma-fence refcounting.
1194  */
xe_lrc_init_seqno_fence(struct xe_lrc * lrc,struct dma_fence * fence)1195 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1196 {
1197 	xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1198 }
1199 
xe_lrc_seqno(struct xe_lrc * lrc)1200 s32 xe_lrc_seqno(struct xe_lrc *lrc)
1201 {
1202 	struct iosys_map map = __xe_lrc_seqno_map(lrc);
1203 
1204 	return xe_map_read32(lrc_to_xe(lrc), &map);
1205 }
1206 
xe_lrc_start_seqno(struct xe_lrc * lrc)1207 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1208 {
1209 	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1210 
1211 	return xe_map_read32(lrc_to_xe(lrc), &map);
1212 }
1213 
xe_lrc_start_seqno_ggtt_addr(struct xe_lrc * lrc)1214 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1215 {
1216 	return __xe_lrc_start_seqno_ggtt_addr(lrc);
1217 }
1218 
xe_lrc_parallel_ggtt_addr(struct xe_lrc * lrc)1219 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1220 {
1221 	return __xe_lrc_parallel_ggtt_addr(lrc);
1222 }
1223 
xe_lrc_parallel_map(struct xe_lrc * lrc)1224 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1225 {
1226 	return __xe_lrc_parallel_map(lrc);
1227 }
1228 
instr_dw(u32 cmd_header)1229 static int instr_dw(u32 cmd_header)
1230 {
1231 	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1232 	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1233 	    GFXPIPE_SINGLE_DW_CMD(0, 0))
1234 		return 1;
1235 
1236 	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1237 	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1238 		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1239 
1240 	/* Most instructions have the # of dwords (minus 2) in 7:0 */
1241 	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1242 }
1243 
dump_mi_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1244 static int dump_mi_command(struct drm_printer *p,
1245 			   struct xe_gt *gt,
1246 			   u32 *dw,
1247 			   int remaining_dw)
1248 {
1249 	u32 inst_header = *dw;
1250 	u32 numdw = instr_dw(inst_header);
1251 	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1252 	int num_noop;
1253 
1254 	/* First check for commands that don't have/use a '# DW' field */
1255 	switch (inst_header & MI_OPCODE) {
1256 	case MI_NOOP:
1257 		num_noop = 1;
1258 		while (num_noop < remaining_dw &&
1259 		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1260 			num_noop++;
1261 		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1262 		return num_noop;
1263 
1264 	case MI_TOPOLOGY_FILTER:
1265 		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1266 		return 1;
1267 
1268 	case MI_BATCH_BUFFER_END:
1269 		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1270 		/* Return 'remaining_dw' to consume the rest of the LRC */
1271 		return remaining_dw;
1272 	}
1273 
1274 	/*
1275 	 * Any remaining commands include a # of dwords.  We should make sure
1276 	 * it doesn't exceed the remaining size of the LRC.
1277 	 */
1278 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1279 		numdw = remaining_dw;
1280 
1281 	switch (inst_header & MI_OPCODE) {
1282 	case MI_LOAD_REGISTER_IMM:
1283 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1284 			   inst_header, (numdw - 1) / 2);
1285 		for (int i = 1; i < numdw; i += 2)
1286 			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1287 		return numdw;
1288 
1289 	case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1290 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1291 			   inst_header,
1292 			   dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1293 			   dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1294 		if (numdw == 4)
1295 			drm_printf(p, " - %#6x = %#010llx\n",
1296 				   dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1297 		else
1298 			drm_printf(p, " - %*ph (%s)\n",
1299 				   (int)sizeof(u32) * (numdw - 1), dw + 1,
1300 				   numdw < 4 ? "truncated" : "malformed");
1301 		return numdw;
1302 
1303 	case MI_FORCE_WAKEUP:
1304 		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1305 		return numdw;
1306 
1307 	default:
1308 		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1309 			   inst_header, opcode, numdw);
1310 		return numdw;
1311 	}
1312 }
1313 
dump_gfxpipe_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1314 static int dump_gfxpipe_command(struct drm_printer *p,
1315 				struct xe_gt *gt,
1316 				u32 *dw,
1317 				int remaining_dw)
1318 {
1319 	u32 numdw = instr_dw(*dw);
1320 	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1321 	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1322 	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1323 
1324 	/*
1325 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1326 	 * remaining size of the LRC.
1327 	 */
1328 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1329 		numdw = remaining_dw;
1330 
1331 	switch (*dw & GFXPIPE_MATCH_MASK) {
1332 #define MATCH(cmd) \
1333 	case cmd: \
1334 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1335 		return numdw
1336 #define MATCH3D(cmd) \
1337 	case CMD_##cmd: \
1338 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1339 		return numdw
1340 
1341 	MATCH(STATE_BASE_ADDRESS);
1342 	MATCH(STATE_SIP);
1343 	MATCH(GPGPU_CSR_BASE_ADDRESS);
1344 	MATCH(STATE_COMPUTE_MODE);
1345 	MATCH3D(3DSTATE_BTD);
1346 	MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1347 	MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
1348 
1349 	MATCH3D(3DSTATE_VF_STATISTICS);
1350 
1351 	MATCH(PIPELINE_SELECT);
1352 
1353 	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1354 	MATCH3D(3DSTATE_CLEAR_PARAMS);
1355 	MATCH3D(3DSTATE_DEPTH_BUFFER);
1356 	MATCH3D(3DSTATE_STENCIL_BUFFER);
1357 	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1358 	MATCH3D(3DSTATE_VERTEX_BUFFERS);
1359 	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1360 	MATCH3D(3DSTATE_INDEX_BUFFER);
1361 	MATCH3D(3DSTATE_VF);
1362 	MATCH3D(3DSTATE_MULTISAMPLE);
1363 	MATCH3D(3DSTATE_CC_STATE_POINTERS);
1364 	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1365 	MATCH3D(3DSTATE_VS);
1366 	MATCH3D(3DSTATE_GS);
1367 	MATCH3D(3DSTATE_CLIP);
1368 	MATCH3D(3DSTATE_SF);
1369 	MATCH3D(3DSTATE_WM);
1370 	MATCH3D(3DSTATE_CONSTANT_VS);
1371 	MATCH3D(3DSTATE_CONSTANT_GS);
1372 	MATCH3D(3DSTATE_CONSTANT_PS);
1373 	MATCH3D(3DSTATE_SAMPLE_MASK);
1374 	MATCH3D(3DSTATE_CONSTANT_HS);
1375 	MATCH3D(3DSTATE_CONSTANT_DS);
1376 	MATCH3D(3DSTATE_HS);
1377 	MATCH3D(3DSTATE_TE);
1378 	MATCH3D(3DSTATE_DS);
1379 	MATCH3D(3DSTATE_STREAMOUT);
1380 	MATCH3D(3DSTATE_SBE);
1381 	MATCH3D(3DSTATE_PS);
1382 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1383 	MATCH3D(3DSTATE_CPS_POINTERS);
1384 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1385 	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1386 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1387 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1388 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1389 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1390 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1391 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1392 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1393 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1394 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1395 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1396 	MATCH3D(3DSTATE_VF_INSTANCING);
1397 	MATCH3D(3DSTATE_VF_SGVS);
1398 	MATCH3D(3DSTATE_VF_TOPOLOGY);
1399 	MATCH3D(3DSTATE_WM_CHROMAKEY);
1400 	MATCH3D(3DSTATE_PS_BLEND);
1401 	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1402 	MATCH3D(3DSTATE_PS_EXTRA);
1403 	MATCH3D(3DSTATE_RASTER);
1404 	MATCH3D(3DSTATE_SBE_SWIZ);
1405 	MATCH3D(3DSTATE_WM_HZ_OP);
1406 	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1407 	MATCH3D(3DSTATE_VF_SGVS_2);
1408 	MATCH3D(3DSTATE_VFG);
1409 	MATCH3D(3DSTATE_URB_ALLOC_VS);
1410 	MATCH3D(3DSTATE_URB_ALLOC_HS);
1411 	MATCH3D(3DSTATE_URB_ALLOC_DS);
1412 	MATCH3D(3DSTATE_URB_ALLOC_GS);
1413 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1414 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1415 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1416 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1417 	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1418 	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1419 	MATCH3D(3DSTATE_AMFS);
1420 	MATCH3D(3DSTATE_DEPTH_BOUNDS);
1421 	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1422 	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1423 	MATCH3D(3DSTATE_MESH_CONTROL);
1424 	MATCH3D(3DSTATE_MESH_DISTRIB);
1425 	MATCH3D(3DSTATE_TASK_REDISTRIB);
1426 	MATCH3D(3DSTATE_MESH_SHADER);
1427 	MATCH3D(3DSTATE_MESH_SHADER_DATA);
1428 	MATCH3D(3DSTATE_TASK_CONTROL);
1429 	MATCH3D(3DSTATE_TASK_SHADER);
1430 	MATCH3D(3DSTATE_TASK_SHADER_DATA);
1431 	MATCH3D(3DSTATE_URB_ALLOC_MESH);
1432 	MATCH3D(3DSTATE_URB_ALLOC_TASK);
1433 	MATCH3D(3DSTATE_CLIP_MESH);
1434 	MATCH3D(3DSTATE_SBE_MESH);
1435 	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1436 
1437 	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1438 	MATCH3D(3DSTATE_CHROMA_KEY);
1439 	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1440 	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1441 	MATCH3D(3DSTATE_LINE_STIPPLE);
1442 	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1443 	MATCH3D(3DSTATE_MONOFILTER_SIZE);
1444 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1445 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1446 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1447 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1448 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1449 	MATCH3D(3DSTATE_SO_DECL_LIST);
1450 	MATCH3D(3DSTATE_SO_BUFFER);
1451 	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1452 	MATCH3D(3DSTATE_SAMPLE_PATTERN);
1453 	MATCH3D(3DSTATE_3D_MODE);
1454 	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1455 	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1456 	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1457 
1458 	default:
1459 		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1460 			   *dw, pipeline, opcode, subopcode, numdw);
1461 		return numdw;
1462 	}
1463 }
1464 
dump_gfx_state_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1465 static int dump_gfx_state_command(struct drm_printer *p,
1466 				  struct xe_gt *gt,
1467 				  u32 *dw,
1468 				  int remaining_dw)
1469 {
1470 	u32 numdw = instr_dw(*dw);
1471 	u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1472 
1473 	/*
1474 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1475 	 * remaining size of the LRC.
1476 	 */
1477 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1478 		numdw = remaining_dw;
1479 
1480 	switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1481 	MATCH(STATE_WRITE_INLINE);
1482 
1483 	default:
1484 		drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1485 			   *dw, opcode, numdw);
1486 		return numdw;
1487 	}
1488 }
1489 
xe_lrc_dump_default(struct drm_printer * p,struct xe_gt * gt,enum xe_engine_class hwe_class)1490 void xe_lrc_dump_default(struct drm_printer *p,
1491 			 struct xe_gt *gt,
1492 			 enum xe_engine_class hwe_class)
1493 {
1494 	u32 *dw;
1495 	int remaining_dw, num_dw;
1496 
1497 	if (!gt->default_lrc[hwe_class]) {
1498 		drm_printf(p, "No default LRC for class %d\n", hwe_class);
1499 		return;
1500 	}
1501 
1502 	/*
1503 	 * Skip the beginning of the LRC since it contains the per-process
1504 	 * hardware status page.
1505 	 */
1506 	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1507 	remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
1508 
1509 	while (remaining_dw > 0) {
1510 		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1511 			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1512 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1513 			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1514 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1515 			num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
1516 		} else {
1517 			num_dw = min(instr_dw(*dw), remaining_dw);
1518 			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1519 				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1520 				   num_dw);
1521 		}
1522 
1523 		dw += num_dw;
1524 		remaining_dw -= num_dw;
1525 	}
1526 }
1527 
1528 struct instr_state {
1529 	u32 instr;
1530 	u16 num_dw;
1531 };
1532 
1533 static const struct instr_state xe_hpg_svg_state[] = {
1534 	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1535 	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1536 	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1537 	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1538 	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1539 	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1540 	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1541 	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1542 	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1543 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1544 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1545 	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1546 	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1547 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1548 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1549 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1550 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1551 	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1552 	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1553 	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1554 	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1555 	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1556 	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1557 	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1558 	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1559 	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1560 	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1561 	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1562 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1563 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1564 	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1565 	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1566 	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1567 	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1568 	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1569 	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1570 	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1571 	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1572 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1573 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1574 	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1575 	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1576 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1577 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1578 	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1579 	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1580 	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1581 	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1582 	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1583 	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1584 };
1585 
xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue * q,struct xe_bb * bb)1586 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1587 {
1588 	struct xe_gt *gt = q->hwe->gt;
1589 	struct xe_device *xe = gt_to_xe(gt);
1590 	const struct instr_state *state_table = NULL;
1591 	int state_table_size = 0;
1592 
1593 	/*
1594 	 * Wa_14019789679
1595 	 *
1596 	 * If the driver doesn't explicitly emit the SVG instructions while
1597 	 * setting up the default LRC, the context switch will write 0's
1598 	 * (noops) into the LRC memory rather than the expected instruction
1599 	 * headers.  Application contexts start out as a copy of the default
1600 	 * LRC, and if they also do not emit specific settings for some SVG
1601 	 * state, then on context restore they'll unintentionally inherit
1602 	 * whatever state setting the previous context had programmed into the
1603 	 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
1604 	 * prevent the hardware from resetting that state back to any specific
1605 	 * value).
1606 	 *
1607 	 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
1608 	 * since that's a specific state setting that can easily cause GPU
1609 	 * hangs if unintentionally inherited.  However to be safe we'll
1610 	 * continue to emit all of the SVG state since it's best not to leak
1611 	 * any of the state between contexts, even if that leakage is harmless.
1612 	 */
1613 	if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
1614 		state_table = xe_hpg_svg_state;
1615 		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1616 	}
1617 
1618 	if (!state_table) {
1619 		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1620 			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1621 		return;
1622 	}
1623 
1624 	for (int i = 0; i < state_table_size; i++) {
1625 		u32 instr = state_table[i].instr;
1626 		u16 num_dw = state_table[i].num_dw;
1627 		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1628 
1629 		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1630 		xe_gt_assert(gt, num_dw != 0);
1631 		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1632 
1633 		/*
1634 		 * Xe2's SVG context is the same as the one on DG2 / MTL
1635 		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1636 		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1637 		 * Just make the replacement here rather than defining a
1638 		 * whole separate table for the single trivial change.
1639 		 */
1640 		if (GRAPHICS_VER(xe) >= 20 &&
1641 		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1642 			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1643 
1644 		bb->cs[bb->len] = instr;
1645 		if (!is_single_dw)
1646 			bb->cs[bb->len] |= (num_dw - 2);
1647 
1648 		bb->len += num_dw;
1649 	}
1650 }
1651 
xe_lrc_snapshot_capture(struct xe_lrc * lrc)1652 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1653 {
1654 	struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1655 
1656 	if (!snapshot)
1657 		return NULL;
1658 
1659 	if (lrc->bo->vm)
1660 		xe_vm_get(lrc->bo->vm);
1661 
1662 	snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
1663 	snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc);
1664 	snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
1665 	snapshot->head = xe_lrc_ring_head(lrc);
1666 	snapshot->tail.internal = lrc->ring.tail;
1667 	snapshot->tail.memory = xe_lrc_ring_tail(lrc);
1668 	snapshot->start = xe_lrc_ring_start(lrc);
1669 	snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1670 	snapshot->seqno = xe_lrc_seqno(lrc);
1671 	snapshot->lrc_bo = xe_bo_get(lrc->bo);
1672 	snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
1673 	snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
1674 	snapshot->lrc_snapshot = NULL;
1675 	snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1676 	snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
1677 	return snapshot;
1678 }
1679 
xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot * snapshot)1680 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
1681 {
1682 	struct xe_bo *bo;
1683 	struct xe_vm *vm;
1684 	struct iosys_map src;
1685 
1686 	if (!snapshot)
1687 		return;
1688 
1689 	bo = snapshot->lrc_bo;
1690 	vm = bo->vm;
1691 	snapshot->lrc_bo = NULL;
1692 
1693 	snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
1694 	if (!snapshot->lrc_snapshot)
1695 		goto put_bo;
1696 
1697 	xe_bo_lock(bo, false);
1698 	if (!ttm_bo_vmap(&bo->ttm, &src)) {
1699 		xe_map_memcpy_from(xe_bo_device(bo),
1700 				   snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
1701 				   snapshot->lrc_size);
1702 		ttm_bo_vunmap(&bo->ttm, &src);
1703 	} else {
1704 		kvfree(snapshot->lrc_snapshot);
1705 		snapshot->lrc_snapshot = NULL;
1706 	}
1707 	xe_bo_unlock(bo);
1708 put_bo:
1709 	xe_bo_put(bo);
1710 	if (vm)
1711 		xe_vm_put(vm);
1712 }
1713 
xe_lrc_snapshot_print(struct xe_lrc_snapshot * snapshot,struct drm_printer * p)1714 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
1715 {
1716 	unsigned long i;
1717 
1718 	if (!snapshot)
1719 		return;
1720 
1721 	drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
1722 	drm_printf(p, "\tHW Ring address: 0x%08x\n",
1723 		   snapshot->ring_addr);
1724 	drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
1725 		   snapshot->indirect_context_desc);
1726 	drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
1727 	drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1728 		   snapshot->tail.internal, snapshot->tail.memory);
1729 	drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start);
1730 	drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
1731 	drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
1732 	drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
1733 	drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
1734 
1735 	if (!snapshot->lrc_snapshot)
1736 		return;
1737 
1738 	drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
1739 	drm_puts(p, "\t[HWSP].data: ");
1740 	for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
1741 		u32 *val = snapshot->lrc_snapshot + i;
1742 		char dumped[ASCII85_BUFSZ];
1743 
1744 		drm_puts(p, ascii85_encode(*val, dumped));
1745 	}
1746 
1747 	drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
1748 	drm_puts(p, "\t[HWCTX].data: ");
1749 	for (; i < snapshot->lrc_size; i += sizeof(u32)) {
1750 		u32 *val = snapshot->lrc_snapshot + i;
1751 		char dumped[ASCII85_BUFSZ];
1752 
1753 		drm_puts(p, ascii85_encode(*val, dumped));
1754 	}
1755 	drm_puts(p, "\n");
1756 }
1757 
xe_lrc_snapshot_free(struct xe_lrc_snapshot * snapshot)1758 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
1759 {
1760 	if (!snapshot)
1761 		return;
1762 
1763 	kvfree(snapshot->lrc_snapshot);
1764 	if (snapshot->lrc_bo) {
1765 		struct xe_vm *vm;
1766 
1767 		vm = snapshot->lrc_bo->vm;
1768 		xe_bo_put(snapshot->lrc_bo);
1769 		if (vm)
1770 			xe_vm_put(vm);
1771 	}
1772 	kfree(snapshot);
1773 }
1774 
1775 /**
1776  * xe_lrc_update_timestamp() - Update ctx timestamp
1777  * @lrc: Pointer to the lrc.
1778  * @old_ts: Old timestamp value
1779  *
1780  * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
1781  * update saved value.
1782  *
1783  * Returns: New ctx timestamp value
1784  */
xe_lrc_update_timestamp(struct xe_lrc * lrc,u32 * old_ts)1785 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
1786 {
1787 	*old_ts = lrc->ctx_timestamp;
1788 
1789 	lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1790 
1791 	trace_xe_lrc_update_timestamp(lrc, *old_ts);
1792 
1793 	return lrc->ctx_timestamp;
1794 }
1795 
1796 /**
1797  * xe_lrc_ring_is_idle() - LRC is idle
1798  * @lrc: Pointer to the lrc.
1799  *
1800  * Compare LRC ring head and tail to determine if idle.
1801  *
1802  * Return: True is ring is idle, False otherwise
1803  */
xe_lrc_ring_is_idle(struct xe_lrc * lrc)1804 bool xe_lrc_ring_is_idle(struct xe_lrc *lrc)
1805 {
1806 	return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc);
1807 }
1808