xref: /linux/drivers/gpu/drm/xe/xe_lrc.c (revision 42bb9b630c4c6c0964cddca98d9d30aa992826de)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_lrc.h"
7 
8 #include <generated/xe_wa_oob.h>
9 
10 #include <linux/ascii85.h>
11 
12 #include "instructions/xe_mi_commands.h"
13 #include "instructions/xe_gfxpipe_commands.h"
14 #include "instructions/xe_gfx_state_commands.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_lrc_layout.h"
17 #include "xe_bb.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_drm_client.h"
21 #include "xe_exec_queue_types.h"
22 #include "xe_gt.h"
23 #include "xe_gt_printk.h"
24 #include "xe_hw_fence.h"
25 #include "xe_map.h"
26 #include "xe_memirq.h"
27 #include "xe_mmio.h"
28 #include "xe_sriov.h"
29 #include "xe_trace_lrc.h"
30 #include "xe_vm.h"
31 #include "xe_wa.h"
32 
33 #define LRC_VALID				BIT_ULL(0)
34 #define LRC_PRIVILEGE				BIT_ULL(8)
35 #define LRC_ADDRESSING_MODE			GENMASK_ULL(4, 3)
36 #define LRC_LEGACY_64B_CONTEXT			3
37 
38 #define LRC_ENGINE_CLASS			GENMASK_ULL(63, 61)
39 #define LRC_ENGINE_INSTANCE			GENMASK_ULL(53, 48)
40 
41 #define LRC_PPHWSP_SIZE				SZ_4K
42 #define LRC_INDIRECT_RING_STATE_SIZE		SZ_4K
43 #define LRC_WA_BB_SIZE				SZ_4K
44 
45 static struct xe_device *
lrc_to_xe(struct xe_lrc * lrc)46 lrc_to_xe(struct xe_lrc *lrc)
47 {
48 	return gt_to_xe(lrc->fence_ctx.gt);
49 }
50 
xe_gt_lrc_size(struct xe_gt * gt,enum xe_engine_class class)51 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
52 {
53 	struct xe_device *xe = gt_to_xe(gt);
54 	size_t size;
55 
56 	/* Per-process HW status page (PPHWSP) */
57 	size = LRC_PPHWSP_SIZE;
58 
59 	/* Engine context image */
60 	switch (class) {
61 	case XE_ENGINE_CLASS_RENDER:
62 		if (GRAPHICS_VER(xe) >= 20)
63 			size += 3 * SZ_4K;
64 		else
65 			size += 13 * SZ_4K;
66 		break;
67 	case XE_ENGINE_CLASS_COMPUTE:
68 		if (GRAPHICS_VER(xe) >= 20)
69 			size += 2 * SZ_4K;
70 		else
71 			size += 13 * SZ_4K;
72 		break;
73 	default:
74 		WARN(1, "Unknown engine class: %d", class);
75 		fallthrough;
76 	case XE_ENGINE_CLASS_COPY:
77 	case XE_ENGINE_CLASS_VIDEO_DECODE:
78 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
79 	case XE_ENGINE_CLASS_OTHER:
80 		size += 1 * SZ_4K;
81 	}
82 
83 	/* Add indirect ring state page */
84 	if (xe_gt_has_indirect_ring_state(gt))
85 		size += LRC_INDIRECT_RING_STATE_SIZE;
86 
87 	return size;
88 }
89 
90 /*
91  * The per-platform tables are u8-encoded in @data. Decode @data and set the
92  * addresses' offset and commands in @regs. The following encoding is used
93  * for each byte. There are 2 steps: decoding commands and decoding addresses.
94  *
95  * Commands:
96  * [7]: create NOPs - number of NOPs are set in lower bits
97  * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
98  *      MI_LRI_FORCE_POSTED
99  * [5:0]: Number of NOPs or registers to set values to in case of
100  *        MI_LOAD_REGISTER_IMM
101  *
102  * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
103  * number of registers. They are set by using the REG/REG16 macros: the former
104  * is used for offsets smaller than 0x200 while the latter is for values bigger
105  * than that. Those macros already set all the bits documented below correctly:
106  *
107  * [7]: When a register offset needs more than 6 bits, use additional bytes, to
108  *      follow, for the lower bits
109  * [6:0]: Register offset, without considering the engine base.
110  *
111  * This function only tweaks the commands and register offsets. Values are not
112  * filled out.
113  */
set_offsets(u32 * regs,const u8 * data,const struct xe_hw_engine * hwe)114 static void set_offsets(u32 *regs,
115 			const u8 *data,
116 			const struct xe_hw_engine *hwe)
117 #define NOP(x) (BIT(7) | (x))
118 #define LRI(count, flags) ((flags) << 6 | (count) | \
119 			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
120 #define POSTED BIT(0)
121 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
122 #define REG16(x) \
123 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
124 	(((x) >> 2) & 0x7f)
125 {
126 	const u32 base = hwe->mmio_base;
127 
128 	while (*data) {
129 		u8 count, flags;
130 
131 		if (*data & BIT(7)) { /* skip */
132 			count = *data++ & ~BIT(7);
133 			regs += count;
134 			continue;
135 		}
136 
137 		count = *data & 0x3f;
138 		flags = *data >> 6;
139 		data++;
140 
141 		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
142 		if (flags & POSTED)
143 			*regs |= MI_LRI_FORCE_POSTED;
144 		*regs |= MI_LRI_LRM_CS_MMIO;
145 		regs++;
146 
147 		xe_gt_assert(hwe->gt, count);
148 		do {
149 			u32 offset = 0;
150 			u8 v;
151 
152 			do {
153 				v = *data++;
154 				offset <<= 7;
155 				offset |= v & ~BIT(7);
156 			} while (v & BIT(7));
157 
158 			regs[0] = base + (offset << 2);
159 			regs += 2;
160 		} while (--count);
161 	}
162 
163 	*regs = MI_BATCH_BUFFER_END | BIT(0);
164 }
165 
166 static const u8 gen12_xcs_offsets[] = {
167 	NOP(1),
168 	LRI(13, POSTED),
169 	REG16(0x244),
170 	REG(0x034),
171 	REG(0x030),
172 	REG(0x038),
173 	REG(0x03c),
174 	REG(0x168),
175 	REG(0x140),
176 	REG(0x110),
177 	REG(0x1c0),
178 	REG(0x1c4),
179 	REG(0x1c8),
180 	REG(0x180),
181 	REG16(0x2b4),
182 
183 	NOP(5),
184 	LRI(9, POSTED),
185 	REG16(0x3a8),
186 	REG16(0x28c),
187 	REG16(0x288),
188 	REG16(0x284),
189 	REG16(0x280),
190 	REG16(0x27c),
191 	REG16(0x278),
192 	REG16(0x274),
193 	REG16(0x270),
194 
195 	0
196 };
197 
198 static const u8 dg2_xcs_offsets[] = {
199 	NOP(1),
200 	LRI(15, POSTED),
201 	REG16(0x244),
202 	REG(0x034),
203 	REG(0x030),
204 	REG(0x038),
205 	REG(0x03c),
206 	REG(0x168),
207 	REG(0x140),
208 	REG(0x110),
209 	REG(0x1c0),
210 	REG(0x1c4),
211 	REG(0x1c8),
212 	REG(0x180),
213 	REG16(0x2b4),
214 	REG(0x120),
215 	REG(0x124),
216 
217 	NOP(1),
218 	LRI(9, POSTED),
219 	REG16(0x3a8),
220 	REG16(0x28c),
221 	REG16(0x288),
222 	REG16(0x284),
223 	REG16(0x280),
224 	REG16(0x27c),
225 	REG16(0x278),
226 	REG16(0x274),
227 	REG16(0x270),
228 
229 	0
230 };
231 
232 static const u8 gen12_rcs_offsets[] = {
233 	NOP(1),
234 	LRI(13, POSTED),
235 	REG16(0x244),
236 	REG(0x034),
237 	REG(0x030),
238 	REG(0x038),
239 	REG(0x03c),
240 	REG(0x168),
241 	REG(0x140),
242 	REG(0x110),
243 	REG(0x1c0),
244 	REG(0x1c4),
245 	REG(0x1c8),
246 	REG(0x180),
247 	REG16(0x2b4),
248 
249 	NOP(5),
250 	LRI(9, POSTED),
251 	REG16(0x3a8),
252 	REG16(0x28c),
253 	REG16(0x288),
254 	REG16(0x284),
255 	REG16(0x280),
256 	REG16(0x27c),
257 	REG16(0x278),
258 	REG16(0x274),
259 	REG16(0x270),
260 
261 	LRI(3, POSTED),
262 	REG(0x1b0),
263 	REG16(0x5a8),
264 	REG16(0x5ac),
265 
266 	NOP(6),
267 	LRI(1, 0),
268 	REG(0x0c8),
269 	NOP(3 + 9 + 1),
270 
271 	LRI(51, POSTED),
272 	REG16(0x588),
273 	REG16(0x588),
274 	REG16(0x588),
275 	REG16(0x588),
276 	REG16(0x588),
277 	REG16(0x588),
278 	REG(0x028),
279 	REG(0x09c),
280 	REG(0x0c0),
281 	REG(0x178),
282 	REG(0x17c),
283 	REG16(0x358),
284 	REG(0x170),
285 	REG(0x150),
286 	REG(0x154),
287 	REG(0x158),
288 	REG16(0x41c),
289 	REG16(0x600),
290 	REG16(0x604),
291 	REG16(0x608),
292 	REG16(0x60c),
293 	REG16(0x610),
294 	REG16(0x614),
295 	REG16(0x618),
296 	REG16(0x61c),
297 	REG16(0x620),
298 	REG16(0x624),
299 	REG16(0x628),
300 	REG16(0x62c),
301 	REG16(0x630),
302 	REG16(0x634),
303 	REG16(0x638),
304 	REG16(0x63c),
305 	REG16(0x640),
306 	REG16(0x644),
307 	REG16(0x648),
308 	REG16(0x64c),
309 	REG16(0x650),
310 	REG16(0x654),
311 	REG16(0x658),
312 	REG16(0x65c),
313 	REG16(0x660),
314 	REG16(0x664),
315 	REG16(0x668),
316 	REG16(0x66c),
317 	REG16(0x670),
318 	REG16(0x674),
319 	REG16(0x678),
320 	REG16(0x67c),
321 	REG(0x068),
322 	REG(0x084),
323 	NOP(1),
324 
325 	0
326 };
327 
328 static const u8 xehp_rcs_offsets[] = {
329 	NOP(1),
330 	LRI(13, POSTED),
331 	REG16(0x244),
332 	REG(0x034),
333 	REG(0x030),
334 	REG(0x038),
335 	REG(0x03c),
336 	REG(0x168),
337 	REG(0x140),
338 	REG(0x110),
339 	REG(0x1c0),
340 	REG(0x1c4),
341 	REG(0x1c8),
342 	REG(0x180),
343 	REG16(0x2b4),
344 
345 	NOP(5),
346 	LRI(9, POSTED),
347 	REG16(0x3a8),
348 	REG16(0x28c),
349 	REG16(0x288),
350 	REG16(0x284),
351 	REG16(0x280),
352 	REG16(0x27c),
353 	REG16(0x278),
354 	REG16(0x274),
355 	REG16(0x270),
356 
357 	LRI(3, POSTED),
358 	REG(0x1b0),
359 	REG16(0x5a8),
360 	REG16(0x5ac),
361 
362 	NOP(6),
363 	LRI(1, 0),
364 	REG(0x0c8),
365 
366 	0
367 };
368 
369 static const u8 dg2_rcs_offsets[] = {
370 	NOP(1),
371 	LRI(15, POSTED),
372 	REG16(0x244),
373 	REG(0x034),
374 	REG(0x030),
375 	REG(0x038),
376 	REG(0x03c),
377 	REG(0x168),
378 	REG(0x140),
379 	REG(0x110),
380 	REG(0x1c0),
381 	REG(0x1c4),
382 	REG(0x1c8),
383 	REG(0x180),
384 	REG16(0x2b4),
385 	REG(0x120),
386 	REG(0x124),
387 
388 	NOP(1),
389 	LRI(9, POSTED),
390 	REG16(0x3a8),
391 	REG16(0x28c),
392 	REG16(0x288),
393 	REG16(0x284),
394 	REG16(0x280),
395 	REG16(0x27c),
396 	REG16(0x278),
397 	REG16(0x274),
398 	REG16(0x270),
399 
400 	LRI(3, POSTED),
401 	REG(0x1b0),
402 	REG16(0x5a8),
403 	REG16(0x5ac),
404 
405 	NOP(6),
406 	LRI(1, 0),
407 	REG(0x0c8),
408 
409 	0
410 };
411 
412 static const u8 mtl_rcs_offsets[] = {
413 	NOP(1),
414 	LRI(15, POSTED),
415 	REG16(0x244),
416 	REG(0x034),
417 	REG(0x030),
418 	REG(0x038),
419 	REG(0x03c),
420 	REG(0x168),
421 	REG(0x140),
422 	REG(0x110),
423 	REG(0x1c0),
424 	REG(0x1c4),
425 	REG(0x1c8),
426 	REG(0x180),
427 	REG16(0x2b4),
428 	REG(0x120),
429 	REG(0x124),
430 
431 	NOP(1),
432 	LRI(9, POSTED),
433 	REG16(0x3a8),
434 	REG16(0x28c),
435 	REG16(0x288),
436 	REG16(0x284),
437 	REG16(0x280),
438 	REG16(0x27c),
439 	REG16(0x278),
440 	REG16(0x274),
441 	REG16(0x270),
442 
443 	NOP(2),
444 	LRI(2, POSTED),
445 	REG16(0x5a8),
446 	REG16(0x5ac),
447 
448 	NOP(6),
449 	LRI(1, 0),
450 	REG(0x0c8),
451 
452 	0
453 };
454 
455 #define XE2_CTX_COMMON \
456 	NOP(1),                 /* [0x00] */ \
457 	LRI(15, POSTED),        /* [0x01] */ \
458 	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
459 	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
460 	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
461 	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
462 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
463 	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
464 	REG(0x140),             /* [0x0e] BB_ADDR */ \
465 	REG(0x110),             /* [0x10] BB_STATE */ \
466 	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
467 	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
468 	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
469 	REG(0x180),             /* [0x18] CCID */ \
470 	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
471 	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
472 	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
473 	\
474 	NOP(1),                 /* [0x20] */ \
475 	LRI(9, POSTED),         /* [0x21] */ \
476 	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
477 	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
478 	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
479 	REG16(0x284),           /* [0x28] dummy reg */ \
480 	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
481 	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
482 	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
483 	REG16(0x274),           /* [0x30] PTBP_UDW */ \
484 	REG16(0x270)            /* [0x32] PTBP_LDW */
485 
486 static const u8 xe2_rcs_offsets[] = {
487 	XE2_CTX_COMMON,
488 
489 	NOP(2),                 /* [0x34] */
490 	LRI(2, POSTED),         /* [0x36] */
491 	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
492 	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
493 
494 	NOP(6),                 /* [0x41] */
495 	LRI(1, 0),              /* [0x47] */
496 	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
497 
498 	0
499 };
500 
501 static const u8 xe2_bcs_offsets[] = {
502 	XE2_CTX_COMMON,
503 
504 	NOP(4 + 8 + 1),         /* [0x34] */
505 	LRI(2, POSTED),         /* [0x41] */
506 	REG16(0x200),           /* [0x42] BCS_SWCTRL */
507 	REG16(0x204),           /* [0x44] BLIT_CCTL */
508 
509 	0
510 };
511 
512 static const u8 xe2_xcs_offsets[] = {
513 	XE2_CTX_COMMON,
514 
515 	0
516 };
517 
518 static const u8 xe2_indirect_ring_state_offsets[] = {
519 	NOP(1),                 /* [0x00] */
520 	LRI(5, POSTED),         /* [0x01] */
521 	REG(0x034),             /* [0x02] RING_BUFFER_HEAD */
522 	REG(0x030),             /* [0x04] RING_BUFFER_TAIL */
523 	REG(0x038),             /* [0x06] RING_BUFFER_START */
524 	REG(0x048),             /* [0x08] RING_BUFFER_START_UDW */
525 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */
526 
527 	NOP(5),                 /* [0x0c] */
528 	LRI(9, POSTED),         /* [0x11] */
529 	REG(0x168),             /* [0x12] BB_ADDR_UDW */
530 	REG(0x140),             /* [0x14] BB_ADDR */
531 	REG(0x110),             /* [0x16] BB_STATE */
532 	REG16(0x588),           /* [0x18] BB_STACK_WRITE_PORT */
533 	REG16(0x588),           /* [0x20] BB_STACK_WRITE_PORT */
534 	REG16(0x588),           /* [0x22] BB_STACK_WRITE_PORT */
535 	REG16(0x588),           /* [0x24] BB_STACK_WRITE_PORT */
536 	REG16(0x588),           /* [0x26] BB_STACK_WRITE_PORT */
537 	REG16(0x588),           /* [0x28] BB_STACK_WRITE_PORT */
538 
539 	NOP(12),                 /* [0x00] */
540 
541 	0
542 };
543 
544 #undef REG16
545 #undef REG
546 #undef LRI
547 #undef NOP
548 
reg_offsets(struct xe_device * xe,enum xe_engine_class class)549 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
550 {
551 	if (class == XE_ENGINE_CLASS_RENDER) {
552 		if (GRAPHICS_VER(xe) >= 20)
553 			return xe2_rcs_offsets;
554 		else if (GRAPHICS_VERx100(xe) >= 1270)
555 			return mtl_rcs_offsets;
556 		else if (GRAPHICS_VERx100(xe) >= 1255)
557 			return dg2_rcs_offsets;
558 		else if (GRAPHICS_VERx100(xe) >= 1250)
559 			return xehp_rcs_offsets;
560 		else
561 			return gen12_rcs_offsets;
562 	} else if (class == XE_ENGINE_CLASS_COPY) {
563 		if (GRAPHICS_VER(xe) >= 20)
564 			return xe2_bcs_offsets;
565 		else
566 			return gen12_xcs_offsets;
567 	} else {
568 		if (GRAPHICS_VER(xe) >= 20)
569 			return xe2_xcs_offsets;
570 		else if (GRAPHICS_VERx100(xe) >= 1255)
571 			return dg2_xcs_offsets;
572 		else
573 			return gen12_xcs_offsets;
574 	}
575 }
576 
set_context_control(u32 * regs,struct xe_hw_engine * hwe)577 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
578 {
579 	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
580 						       CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
581 
582 	if (xe_gt_has_indirect_ring_state(hwe->gt))
583 		regs[CTX_CONTEXT_CONTROL] |=
584 			_MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
585 
586 	/* TODO: Timestamp */
587 }
588 
set_memory_based_intr(u32 * regs,struct xe_hw_engine * hwe)589 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
590 {
591 	struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
592 	struct xe_device *xe = gt_to_xe(hwe->gt);
593 	u8 num_regs;
594 
595 	if (!xe_device_uses_memirq(xe))
596 		return;
597 
598 	regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
599 					MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
600 	regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
601 	regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
602 
603 	num_regs = xe_device_has_msix(xe) ? 3 : 2;
604 	regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) |
605 				       MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
606 	regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
607 	regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
608 	regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
609 	regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
610 
611 	if (xe_device_has_msix(xe)) {
612 		regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr;
613 		/* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */
614 	}
615 }
616 
lrc_ring_mi_mode(struct xe_hw_engine * hwe)617 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
618 {
619 	struct xe_device *xe = gt_to_xe(hwe->gt);
620 
621 	if (GRAPHICS_VERx100(xe) >= 1250)
622 		return 0x70;
623 	else
624 		return 0x60;
625 }
626 
reset_stop_ring(u32 * regs,struct xe_hw_engine * hwe)627 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
628 {
629 	int x;
630 
631 	x = lrc_ring_mi_mode(hwe);
632 	regs[x + 1] &= ~STOP_RING;
633 	regs[x + 1] |= STOP_RING << 16;
634 }
635 
xe_lrc_has_indirect_ring_state(struct xe_lrc * lrc)636 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
637 {
638 	return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
639 }
640 
__xe_lrc_ring_offset(struct xe_lrc * lrc)641 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
642 {
643 	return 0;
644 }
645 
xe_lrc_pphwsp_offset(struct xe_lrc * lrc)646 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
647 {
648 	return lrc->ring.size;
649 }
650 
651 /* Make the magic macros work */
652 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
653 #define __xe_lrc_regs_offset xe_lrc_regs_offset
654 
655 #define LRC_SEQNO_PPHWSP_OFFSET 512
656 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
657 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
658 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
659 #define LRC_ENGINE_ID_PPHWSP_OFFSET 2096
660 
xe_lrc_regs_offset(struct xe_lrc * lrc)661 u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
662 {
663 	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
664 }
665 
lrc_reg_size(struct xe_device * xe)666 static size_t lrc_reg_size(struct xe_device *xe)
667 {
668 	if (GRAPHICS_VERx100(xe) >= 1250)
669 		return 96 * sizeof(u32);
670 	else
671 		return 80 * sizeof(u32);
672 }
673 
xe_lrc_skip_size(struct xe_device * xe)674 size_t xe_lrc_skip_size(struct xe_device *xe)
675 {
676 	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
677 }
678 
__xe_lrc_seqno_offset(struct xe_lrc * lrc)679 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
680 {
681 	/* The seqno is stored in the driver-defined portion of PPHWSP */
682 	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
683 }
684 
__xe_lrc_start_seqno_offset(struct xe_lrc * lrc)685 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
686 {
687 	/* The start seqno is stored in the driver-defined portion of PPHWSP */
688 	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
689 }
690 
__xe_lrc_ctx_job_timestamp_offset(struct xe_lrc * lrc)691 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
692 {
693 	/* This is stored in the driver-defined portion of PPHWSP */
694 	return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
695 }
696 
__xe_lrc_parallel_offset(struct xe_lrc * lrc)697 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
698 {
699 	/* The parallel is stored in the driver-defined portion of PPHWSP */
700 	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
701 }
702 
__xe_lrc_engine_id_offset(struct xe_lrc * lrc)703 static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc)
704 {
705 	return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET;
706 }
707 
__xe_lrc_ctx_timestamp_offset(struct xe_lrc * lrc)708 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
709 {
710 	return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
711 }
712 
__xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc * lrc)713 static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc)
714 {
715 	return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32);
716 }
717 
__xe_lrc_indirect_ring_offset(struct xe_lrc * lrc)718 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
719 {
720 	/* Indirect ring state page is at the very end of LRC */
721 	return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
722 }
723 
724 #define DECL_MAP_ADDR_HELPERS(elem) \
725 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
726 { \
727 	struct iosys_map map = lrc->bo->vmap; \
728 \
729 	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
730 	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
731 	return map; \
732 } \
733 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
734 { \
735 	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
736 } \
737 
738 DECL_MAP_ADDR_HELPERS(ring)
DECL_MAP_ADDR_HELPERS(pphwsp)739 DECL_MAP_ADDR_HELPERS(pphwsp)
740 DECL_MAP_ADDR_HELPERS(seqno)
741 DECL_MAP_ADDR_HELPERS(regs)
742 DECL_MAP_ADDR_HELPERS(start_seqno)
743 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
744 DECL_MAP_ADDR_HELPERS(ctx_timestamp)
745 DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw)
746 DECL_MAP_ADDR_HELPERS(parallel)
747 DECL_MAP_ADDR_HELPERS(indirect_ring)
748 DECL_MAP_ADDR_HELPERS(engine_id)
749 
750 #undef DECL_MAP_ADDR_HELPERS
751 
752 /**
753  * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
754  * @lrc: Pointer to the lrc.
755  *
756  * Returns: ctx timestamp GGTT address
757  */
758 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
759 {
760 	return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
761 }
762 
763 /**
764  * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address
765  * @lrc: Pointer to the lrc.
766  *
767  * Returns: ctx timestamp udw GGTT address
768  */
xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc * lrc)769 u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc)
770 {
771 	return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
772 }
773 
774 /**
775  * xe_lrc_ctx_timestamp() - Read ctx timestamp value
776  * @lrc: Pointer to the lrc.
777  *
778  * Returns: ctx timestamp value
779  */
xe_lrc_ctx_timestamp(struct xe_lrc * lrc)780 u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
781 {
782 	struct xe_device *xe = lrc_to_xe(lrc);
783 	struct iosys_map map;
784 	u32 ldw, udw = 0;
785 
786 	map = __xe_lrc_ctx_timestamp_map(lrc);
787 	ldw = xe_map_read32(xe, &map);
788 
789 	if (xe->info.has_64bit_timestamp) {
790 		map = __xe_lrc_ctx_timestamp_udw_map(lrc);
791 		udw = xe_map_read32(xe, &map);
792 	}
793 
794 	return (u64)udw << 32 | ldw;
795 }
796 
797 /**
798  * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
799  * @lrc: Pointer to the lrc.
800  *
801  * Returns: ctx timestamp job GGTT address
802  */
xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc * lrc)803 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
804 {
805 	return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
806 }
807 
808 /**
809  * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
810  * @lrc: Pointer to the lrc.
811  *
812  * Returns: ctx timestamp job value
813  */
xe_lrc_ctx_job_timestamp(struct xe_lrc * lrc)814 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
815 {
816 	struct xe_device *xe = lrc_to_xe(lrc);
817 	struct iosys_map map;
818 
819 	map = __xe_lrc_ctx_job_timestamp_map(lrc);
820 	return xe_map_read32(xe, &map);
821 }
822 
xe_lrc_ggtt_addr(struct xe_lrc * lrc)823 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
824 {
825 	return __xe_lrc_pphwsp_ggtt_addr(lrc);
826 }
827 
xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc * lrc)828 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
829 {
830 	if (!xe_lrc_has_indirect_ring_state(lrc))
831 		return 0;
832 
833 	return __xe_lrc_indirect_ring_ggtt_addr(lrc);
834 }
835 
xe_lrc_read_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr)836 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
837 {
838 	struct xe_device *xe = lrc_to_xe(lrc);
839 	struct iosys_map map;
840 
841 	map = __xe_lrc_indirect_ring_map(lrc);
842 	iosys_map_incr(&map, reg_nr * sizeof(u32));
843 	return xe_map_read32(xe, &map);
844 }
845 
xe_lrc_write_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)846 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
847 					  int reg_nr, u32 val)
848 {
849 	struct xe_device *xe = lrc_to_xe(lrc);
850 	struct iosys_map map;
851 
852 	map = __xe_lrc_indirect_ring_map(lrc);
853 	iosys_map_incr(&map, reg_nr * sizeof(u32));
854 	xe_map_write32(xe, &map, val);
855 }
856 
xe_lrc_read_ctx_reg(struct xe_lrc * lrc,int reg_nr)857 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
858 {
859 	struct xe_device *xe = lrc_to_xe(lrc);
860 	struct iosys_map map;
861 
862 	map = __xe_lrc_regs_map(lrc);
863 	iosys_map_incr(&map, reg_nr * sizeof(u32));
864 	return xe_map_read32(xe, &map);
865 }
866 
xe_lrc_write_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)867 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
868 {
869 	struct xe_device *xe = lrc_to_xe(lrc);
870 	struct iosys_map map;
871 
872 	map = __xe_lrc_regs_map(lrc);
873 	iosys_map_incr(&map, reg_nr * sizeof(u32));
874 	xe_map_write32(xe, &map, val);
875 }
876 
empty_lrc_data(struct xe_hw_engine * hwe)877 static void *empty_lrc_data(struct xe_hw_engine *hwe)
878 {
879 	struct xe_gt *gt = hwe->gt;
880 	void *data;
881 	u32 *regs;
882 
883 	data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
884 	if (!data)
885 		return NULL;
886 
887 	/* 1st page: Per-Process of HW status Page */
888 	regs = data + LRC_PPHWSP_SIZE;
889 	set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
890 	set_context_control(regs, hwe);
891 	set_memory_based_intr(regs, hwe);
892 	reset_stop_ring(regs, hwe);
893 	if (xe_gt_has_indirect_ring_state(gt)) {
894 		regs = data + xe_gt_lrc_size(gt, hwe->class) -
895 		       LRC_INDIRECT_RING_STATE_SIZE;
896 		set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
897 	}
898 
899 	return data;
900 }
901 
xe_lrc_set_ppgtt(struct xe_lrc * lrc,struct xe_vm * vm)902 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
903 {
904 	u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt));
905 
906 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
907 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
908 }
909 
xe_lrc_finish(struct xe_lrc * lrc)910 static void xe_lrc_finish(struct xe_lrc *lrc)
911 {
912 	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
913 	xe_bo_unpin_map_no_vm(lrc->bo);
914 }
915 
wa_bb_offset(struct xe_lrc * lrc)916 static size_t wa_bb_offset(struct xe_lrc *lrc)
917 {
918 	return lrc->bo->size - LRC_WA_BB_SIZE;
919 }
920 
921 /*
922  * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active
923  * context run ticks.
924  * @lrc: Pointer to the lrc.
925  *
926  * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the
927  * context, but only gets updated when the context switches out. In order to
928  * check how long a context has been active before it switches out, two things
929  * are required:
930  *
931  * (1) Determine if the context is running:
932  * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in
933  * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is
934  * initialized. During a query, we just check for this value to determine if the
935  * context is active. If the context switched out, it would overwrite this
936  * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as
937  * the last part of context restore, so reusing this LRC location will not
938  * clobber anything.
939  *
940  * (2) Calculate the time that the context has been active for:
941  * The CTX_TIMESTAMP ticks only when the context is active. If a context is
942  * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization.
943  * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific
944  * engine instance. Since we do not know which instance the context is running
945  * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and
946  * store it in the PPHSWP.
947  */
948 #define CONTEXT_ACTIVE 1ULL
xe_lrc_setup_utilization(struct xe_lrc * lrc)949 static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
950 {
951 	const size_t max_size = LRC_WA_BB_SIZE;
952 	u32 *cmd, *buf = NULL;
953 
954 	if (lrc->bo->vmap.is_iomem) {
955 		buf = kmalloc(max_size, GFP_KERNEL);
956 		if (!buf)
957 			return -ENOMEM;
958 		cmd = buf;
959 	} else {
960 		cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc);
961 	}
962 
963 	*cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
964 	*cmd++ = ENGINE_ID(0).addr;
965 	*cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc);
966 	*cmd++ = 0;
967 
968 	*cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
969 	*cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
970 	*cmd++ = 0;
971 	*cmd++ = lower_32_bits(CONTEXT_ACTIVE);
972 
973 	if (lrc_to_xe(lrc)->info.has_64bit_timestamp) {
974 		*cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
975 		*cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
976 		*cmd++ = 0;
977 		*cmd++ = upper_32_bits(CONTEXT_ACTIVE);
978 	}
979 
980 	*cmd++ = MI_BATCH_BUFFER_END;
981 
982 	if (buf) {
983 		xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap,
984 				 wa_bb_offset(lrc), buf,
985 				 (cmd - buf) * sizeof(*cmd));
986 		kfree(buf);
987 	}
988 
989 	xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) +
990 			     wa_bb_offset(lrc) + 1);
991 
992 	return 0;
993 }
994 
995 #define PVC_CTX_ASID		(0x2e + 1)
996 #define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
997 
xe_lrc_init(struct xe_lrc * lrc,struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size,u16 msix_vec,u32 init_flags)998 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
999 		       struct xe_vm *vm, u32 ring_size, u16 msix_vec,
1000 		       u32 init_flags)
1001 {
1002 	struct xe_gt *gt = hwe->gt;
1003 	struct xe_tile *tile = gt_to_tile(gt);
1004 	struct xe_device *xe = gt_to_xe(gt);
1005 	struct iosys_map map;
1006 	void *init_data = NULL;
1007 	u32 arb_enable;
1008 	u32 lrc_size;
1009 	u32 bo_flags;
1010 	int err;
1011 
1012 	kref_init(&lrc->refcount);
1013 	lrc->gt = gt;
1014 	lrc->flags = 0;
1015 	lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
1016 	if (xe_gt_has_indirect_ring_state(gt))
1017 		lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
1018 
1019 	bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
1020 		   XE_BO_FLAG_GGTT_INVALIDATE;
1021 	if (vm && vm->xef) /* userspace */
1022 		bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
1023 
1024 	/*
1025 	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
1026 	 * via VM bind calls.
1027 	 */
1028 	lrc->bo = xe_bo_create_pin_map(xe, tile, NULL,
1029 				       lrc_size + LRC_WA_BB_SIZE,
1030 				       ttm_bo_type_kernel,
1031 				       bo_flags);
1032 	if (IS_ERR(lrc->bo))
1033 		return PTR_ERR(lrc->bo);
1034 
1035 	lrc->size = lrc_size;
1036 	lrc->ring.size = ring_size;
1037 	lrc->ring.tail = 0;
1038 
1039 	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
1040 			     hwe->fence_irq, hwe->name);
1041 
1042 	if (!gt->default_lrc[hwe->class]) {
1043 		init_data = empty_lrc_data(hwe);
1044 		if (!init_data) {
1045 			err = -ENOMEM;
1046 			goto err_lrc_finish;
1047 		}
1048 	}
1049 
1050 	/*
1051 	 * Init Per-Process of HW status Page, LRC / context state to known
1052 	 * values
1053 	 */
1054 	map = __xe_lrc_pphwsp_map(lrc);
1055 	if (!init_data) {
1056 		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
1057 		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
1058 				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
1059 				 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
1060 	} else {
1061 		xe_map_memcpy_to(xe, &map, 0, init_data,
1062 				 xe_gt_lrc_size(gt, hwe->class));
1063 		kfree(init_data);
1064 	}
1065 
1066 	if (vm) {
1067 		xe_lrc_set_ppgtt(lrc, vm);
1068 
1069 		if (vm->xef)
1070 			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
1071 	}
1072 
1073 	if (xe_device_has_msix(xe)) {
1074 		xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR,
1075 				     xe_memirq_status_ptr(&tile->memirq, hwe));
1076 		xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR,
1077 				     xe_memirq_source_ptr(&tile->memirq, hwe));
1078 		xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec);
1079 	}
1080 
1081 	if (xe_gt_has_indirect_ring_state(gt)) {
1082 		xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
1083 				     __xe_lrc_indirect_ring_ggtt_addr(lrc));
1084 
1085 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
1086 					      __xe_lrc_ring_ggtt_addr(lrc));
1087 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
1088 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
1089 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
1090 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
1091 					      RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
1092 	} else {
1093 		xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
1094 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
1095 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
1096 		xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
1097 				     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
1098 	}
1099 
1100 	if (init_flags & XE_LRC_CREATE_RUNALONE)
1101 		xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL,
1102 				     xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
1103 				     _MASKED_BIT_ENABLE(CTX_CTRL_RUN_ALONE));
1104 
1105 	if (init_flags & XE_LRC_CREATE_PXP)
1106 		xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL,
1107 				     xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
1108 				     _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE));
1109 
1110 	lrc->ctx_timestamp = 0;
1111 	xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
1112 	if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
1113 		xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0);
1114 
1115 	if (xe->info.has_asid && vm)
1116 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
1117 
1118 	lrc->desc = LRC_VALID;
1119 	lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
1120 	/* TODO: Priority */
1121 
1122 	/* While this appears to have something about privileged batches or
1123 	 * some such, it really just means PPGTT mode.
1124 	 */
1125 	if (vm)
1126 		lrc->desc |= LRC_PRIVILEGE;
1127 
1128 	if (GRAPHICS_VERx100(xe) < 1250) {
1129 		lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
1130 		lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
1131 	}
1132 
1133 	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1134 	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
1135 
1136 	map = __xe_lrc_seqno_map(lrc);
1137 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1138 
1139 	map = __xe_lrc_start_seqno_map(lrc);
1140 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1141 
1142 	err = xe_lrc_setup_utilization(lrc);
1143 	if (err)
1144 		goto err_lrc_finish;
1145 
1146 	return 0;
1147 
1148 err_lrc_finish:
1149 	xe_lrc_finish(lrc);
1150 	return err;
1151 }
1152 
1153 /**
1154  * xe_lrc_create - Create a LRC
1155  * @hwe: Hardware Engine
1156  * @vm: The VM (address space)
1157  * @ring_size: LRC ring size
1158  * @msix_vec: MSI-X interrupt vector (for platforms that support it)
1159  * @flags: LRC initialization flags
1160  *
1161  * Allocate and initialize the Logical Ring Context (LRC).
1162  *
1163  * Return pointer to created LRC upon success and an error pointer
1164  * upon failure.
1165  */
xe_lrc_create(struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size,u16 msix_vec,u32 flags)1166 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
1167 			     u32 ring_size, u16 msix_vec, u32 flags)
1168 {
1169 	struct xe_lrc *lrc;
1170 	int err;
1171 
1172 	lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
1173 	if (!lrc)
1174 		return ERR_PTR(-ENOMEM);
1175 
1176 	err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags);
1177 	if (err) {
1178 		kfree(lrc);
1179 		return ERR_PTR(err);
1180 	}
1181 
1182 	return lrc;
1183 }
1184 
1185 /**
1186  * xe_lrc_destroy - Destroy the LRC
1187  * @ref: reference to LRC
1188  *
1189  * Called when ref == 0, release resources held by the Logical Ring Context
1190  * (LRC) and free the LRC memory.
1191  */
xe_lrc_destroy(struct kref * ref)1192 void xe_lrc_destroy(struct kref *ref)
1193 {
1194 	struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
1195 
1196 	xe_lrc_finish(lrc);
1197 	kfree(lrc);
1198 }
1199 
xe_lrc_set_ring_tail(struct xe_lrc * lrc,u32 tail)1200 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
1201 {
1202 	if (xe_lrc_has_indirect_ring_state(lrc))
1203 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
1204 	else
1205 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
1206 }
1207 
xe_lrc_ring_tail(struct xe_lrc * lrc)1208 u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1209 {
1210 	if (xe_lrc_has_indirect_ring_state(lrc))
1211 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1212 	else
1213 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1214 }
1215 
xe_lrc_ring_start(struct xe_lrc * lrc)1216 static u32 xe_lrc_ring_start(struct xe_lrc *lrc)
1217 {
1218 	if (xe_lrc_has_indirect_ring_state(lrc))
1219 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START);
1220 	else
1221 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_START);
1222 }
1223 
xe_lrc_set_ring_head(struct xe_lrc * lrc,u32 head)1224 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1225 {
1226 	if (xe_lrc_has_indirect_ring_state(lrc))
1227 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1228 	else
1229 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
1230 }
1231 
xe_lrc_ring_head(struct xe_lrc * lrc)1232 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1233 {
1234 	if (xe_lrc_has_indirect_ring_state(lrc))
1235 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1236 	else
1237 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
1238 }
1239 
xe_lrc_ring_space(struct xe_lrc * lrc)1240 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1241 {
1242 	const u32 head = xe_lrc_ring_head(lrc);
1243 	const u32 tail = lrc->ring.tail;
1244 	const u32 size = lrc->ring.size;
1245 
1246 	return ((head - tail - 1) & (size - 1)) + 1;
1247 }
1248 
__xe_lrc_write_ring(struct xe_lrc * lrc,struct iosys_map ring,const void * data,size_t size)1249 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1250 				const void *data, size_t size)
1251 {
1252 	struct xe_device *xe = lrc_to_xe(lrc);
1253 
1254 	iosys_map_incr(&ring, lrc->ring.tail);
1255 	xe_map_memcpy_to(xe, &ring, 0, data, size);
1256 	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1257 }
1258 
xe_lrc_write_ring(struct xe_lrc * lrc,const void * data,size_t size)1259 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1260 {
1261 	struct xe_device *xe = lrc_to_xe(lrc);
1262 	struct iosys_map ring;
1263 	u32 rhs;
1264 	size_t aligned_size;
1265 
1266 	xe_assert(xe, IS_ALIGNED(size, 4));
1267 	aligned_size = ALIGN(size, 8);
1268 
1269 	ring = __xe_lrc_ring_map(lrc);
1270 
1271 	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
1272 	rhs = lrc->ring.size - lrc->ring.tail;
1273 	if (size > rhs) {
1274 		__xe_lrc_write_ring(lrc, ring, data, rhs);
1275 		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1276 	} else {
1277 		__xe_lrc_write_ring(lrc, ring, data, size);
1278 	}
1279 
1280 	if (aligned_size > size) {
1281 		u32 noop = MI_NOOP;
1282 
1283 		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1284 	}
1285 }
1286 
xe_lrc_descriptor(struct xe_lrc * lrc)1287 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1288 {
1289 	return lrc->desc | xe_lrc_ggtt_addr(lrc);
1290 }
1291 
xe_lrc_seqno_ggtt_addr(struct xe_lrc * lrc)1292 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1293 {
1294 	return __xe_lrc_seqno_ggtt_addr(lrc);
1295 }
1296 
1297 /**
1298  * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1299  *
1300  * Allocate but don't initialize an lrc seqno fence.
1301  *
1302  * Return: Pointer to the allocated fence or
1303  * negative error pointer on error.
1304  */
xe_lrc_alloc_seqno_fence(void)1305 struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1306 {
1307 	return xe_hw_fence_alloc();
1308 }
1309 
1310 /**
1311  * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1312  * @fence: Pointer to the fence to free.
1313  *
1314  * Frees an lrc seqno fence that hasn't yet been
1315  * initialized.
1316  */
xe_lrc_free_seqno_fence(struct dma_fence * fence)1317 void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1318 {
1319 	xe_hw_fence_free(fence);
1320 }
1321 
1322 /**
1323  * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1324  * @lrc: Pointer to the lrc.
1325  * @fence: Pointer to the fence to initialize.
1326  *
1327  * Initializes a pre-allocated lrc seqno fence.
1328  * After initialization, the fence is subject to normal
1329  * dma-fence refcounting.
1330  */
xe_lrc_init_seqno_fence(struct xe_lrc * lrc,struct dma_fence * fence)1331 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1332 {
1333 	xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1334 }
1335 
xe_lrc_seqno(struct xe_lrc * lrc)1336 s32 xe_lrc_seqno(struct xe_lrc *lrc)
1337 {
1338 	struct iosys_map map = __xe_lrc_seqno_map(lrc);
1339 
1340 	return xe_map_read32(lrc_to_xe(lrc), &map);
1341 }
1342 
xe_lrc_start_seqno(struct xe_lrc * lrc)1343 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1344 {
1345 	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1346 
1347 	return xe_map_read32(lrc_to_xe(lrc), &map);
1348 }
1349 
xe_lrc_start_seqno_ggtt_addr(struct xe_lrc * lrc)1350 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1351 {
1352 	return __xe_lrc_start_seqno_ggtt_addr(lrc);
1353 }
1354 
xe_lrc_parallel_ggtt_addr(struct xe_lrc * lrc)1355 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1356 {
1357 	return __xe_lrc_parallel_ggtt_addr(lrc);
1358 }
1359 
xe_lrc_parallel_map(struct xe_lrc * lrc)1360 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1361 {
1362 	return __xe_lrc_parallel_map(lrc);
1363 }
1364 
1365 /**
1366  * xe_lrc_engine_id() - Read engine id value
1367  * @lrc: Pointer to the lrc.
1368  *
1369  * Returns: context id value
1370  */
xe_lrc_engine_id(struct xe_lrc * lrc)1371 static u32 xe_lrc_engine_id(struct xe_lrc *lrc)
1372 {
1373 	struct xe_device *xe = lrc_to_xe(lrc);
1374 	struct iosys_map map;
1375 
1376 	map = __xe_lrc_engine_id_map(lrc);
1377 	return xe_map_read32(xe, &map);
1378 }
1379 
instr_dw(u32 cmd_header)1380 static int instr_dw(u32 cmd_header)
1381 {
1382 	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1383 	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1384 	    GFXPIPE_SINGLE_DW_CMD(0, 0))
1385 		return 1;
1386 
1387 	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1388 	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1389 		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1390 
1391 	/* Most instructions have the # of dwords (minus 2) in 7:0 */
1392 	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1393 }
1394 
dump_mi_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1395 static int dump_mi_command(struct drm_printer *p,
1396 			   struct xe_gt *gt,
1397 			   u32 *dw,
1398 			   int remaining_dw)
1399 {
1400 	u32 inst_header = *dw;
1401 	u32 numdw = instr_dw(inst_header);
1402 	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1403 	int num_noop;
1404 
1405 	/* First check for commands that don't have/use a '# DW' field */
1406 	switch (inst_header & MI_OPCODE) {
1407 	case MI_NOOP:
1408 		num_noop = 1;
1409 		while (num_noop < remaining_dw &&
1410 		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1411 			num_noop++;
1412 		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1413 		return num_noop;
1414 
1415 	case MI_TOPOLOGY_FILTER:
1416 		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1417 		return 1;
1418 
1419 	case MI_BATCH_BUFFER_END:
1420 		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1421 		/* Return 'remaining_dw' to consume the rest of the LRC */
1422 		return remaining_dw;
1423 	}
1424 
1425 	/*
1426 	 * Any remaining commands include a # of dwords.  We should make sure
1427 	 * it doesn't exceed the remaining size of the LRC.
1428 	 */
1429 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1430 		numdw = remaining_dw;
1431 
1432 	switch (inst_header & MI_OPCODE) {
1433 	case MI_LOAD_REGISTER_IMM:
1434 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1435 			   inst_header, (numdw - 1) / 2);
1436 		for (int i = 1; i < numdw; i += 2)
1437 			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1438 		return numdw;
1439 
1440 	case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1441 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1442 			   inst_header,
1443 			   dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1444 			   dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1445 		if (numdw == 4)
1446 			drm_printf(p, " - %#6x = %#010llx\n",
1447 				   dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1448 		else
1449 			drm_printf(p, " - %*ph (%s)\n",
1450 				   (int)sizeof(u32) * (numdw - 1), dw + 1,
1451 				   numdw < 4 ? "truncated" : "malformed");
1452 		return numdw;
1453 
1454 	case MI_FORCE_WAKEUP:
1455 		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1456 		return numdw;
1457 
1458 	default:
1459 		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1460 			   inst_header, opcode, numdw);
1461 		return numdw;
1462 	}
1463 }
1464 
dump_gfxpipe_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1465 static int dump_gfxpipe_command(struct drm_printer *p,
1466 				struct xe_gt *gt,
1467 				u32 *dw,
1468 				int remaining_dw)
1469 {
1470 	u32 numdw = instr_dw(*dw);
1471 	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1472 	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1473 	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1474 
1475 	/*
1476 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1477 	 * remaining size of the LRC.
1478 	 */
1479 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1480 		numdw = remaining_dw;
1481 
1482 	switch (*dw & GFXPIPE_MATCH_MASK) {
1483 #define MATCH(cmd) \
1484 	case cmd: \
1485 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1486 		return numdw
1487 #define MATCH3D(cmd) \
1488 	case CMD_##cmd: \
1489 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1490 		return numdw
1491 
1492 	MATCH(STATE_BASE_ADDRESS);
1493 	MATCH(STATE_SIP);
1494 	MATCH(GPGPU_CSR_BASE_ADDRESS);
1495 	MATCH(STATE_COMPUTE_MODE);
1496 	MATCH3D(3DSTATE_BTD);
1497 	MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1498 	MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
1499 
1500 	MATCH3D(3DSTATE_VF_STATISTICS);
1501 
1502 	MATCH(PIPELINE_SELECT);
1503 
1504 	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1505 	MATCH3D(3DSTATE_CLEAR_PARAMS);
1506 	MATCH3D(3DSTATE_DEPTH_BUFFER);
1507 	MATCH3D(3DSTATE_STENCIL_BUFFER);
1508 	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1509 	MATCH3D(3DSTATE_VERTEX_BUFFERS);
1510 	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1511 	MATCH3D(3DSTATE_INDEX_BUFFER);
1512 	MATCH3D(3DSTATE_VF);
1513 	MATCH3D(3DSTATE_MULTISAMPLE);
1514 	MATCH3D(3DSTATE_CC_STATE_POINTERS);
1515 	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1516 	MATCH3D(3DSTATE_VS);
1517 	MATCH3D(3DSTATE_GS);
1518 	MATCH3D(3DSTATE_CLIP);
1519 	MATCH3D(3DSTATE_SF);
1520 	MATCH3D(3DSTATE_WM);
1521 	MATCH3D(3DSTATE_CONSTANT_VS);
1522 	MATCH3D(3DSTATE_CONSTANT_GS);
1523 	MATCH3D(3DSTATE_CONSTANT_PS);
1524 	MATCH3D(3DSTATE_SAMPLE_MASK);
1525 	MATCH3D(3DSTATE_CONSTANT_HS);
1526 	MATCH3D(3DSTATE_CONSTANT_DS);
1527 	MATCH3D(3DSTATE_HS);
1528 	MATCH3D(3DSTATE_TE);
1529 	MATCH3D(3DSTATE_DS);
1530 	MATCH3D(3DSTATE_STREAMOUT);
1531 	MATCH3D(3DSTATE_SBE);
1532 	MATCH3D(3DSTATE_PS);
1533 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1534 	MATCH3D(3DSTATE_CPS_POINTERS);
1535 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1536 	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1537 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1538 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1539 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1540 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1541 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1542 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1543 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1544 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1545 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1546 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1547 	MATCH3D(3DSTATE_VF_INSTANCING);
1548 	MATCH3D(3DSTATE_VF_SGVS);
1549 	MATCH3D(3DSTATE_VF_TOPOLOGY);
1550 	MATCH3D(3DSTATE_WM_CHROMAKEY);
1551 	MATCH3D(3DSTATE_PS_BLEND);
1552 	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1553 	MATCH3D(3DSTATE_PS_EXTRA);
1554 	MATCH3D(3DSTATE_RASTER);
1555 	MATCH3D(3DSTATE_SBE_SWIZ);
1556 	MATCH3D(3DSTATE_WM_HZ_OP);
1557 	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1558 	MATCH3D(3DSTATE_VF_SGVS_2);
1559 	MATCH3D(3DSTATE_VFG);
1560 	MATCH3D(3DSTATE_URB_ALLOC_VS);
1561 	MATCH3D(3DSTATE_URB_ALLOC_HS);
1562 	MATCH3D(3DSTATE_URB_ALLOC_DS);
1563 	MATCH3D(3DSTATE_URB_ALLOC_GS);
1564 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1565 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1566 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1567 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1568 	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1569 	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1570 	MATCH3D(3DSTATE_AMFS);
1571 	MATCH3D(3DSTATE_DEPTH_BOUNDS);
1572 	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1573 	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1574 	MATCH3D(3DSTATE_MESH_CONTROL);
1575 	MATCH3D(3DSTATE_MESH_DISTRIB);
1576 	MATCH3D(3DSTATE_TASK_REDISTRIB);
1577 	MATCH3D(3DSTATE_MESH_SHADER);
1578 	MATCH3D(3DSTATE_MESH_SHADER_DATA);
1579 	MATCH3D(3DSTATE_TASK_CONTROL);
1580 	MATCH3D(3DSTATE_TASK_SHADER);
1581 	MATCH3D(3DSTATE_TASK_SHADER_DATA);
1582 	MATCH3D(3DSTATE_URB_ALLOC_MESH);
1583 	MATCH3D(3DSTATE_URB_ALLOC_TASK);
1584 	MATCH3D(3DSTATE_CLIP_MESH);
1585 	MATCH3D(3DSTATE_SBE_MESH);
1586 	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1587 	MATCH3D(3DSTATE_COARSE_PIXEL);
1588 
1589 	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1590 	MATCH3D(3DSTATE_CHROMA_KEY);
1591 	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1592 	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1593 	MATCH3D(3DSTATE_LINE_STIPPLE);
1594 	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1595 	MATCH3D(3DSTATE_MONOFILTER_SIZE);
1596 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1597 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1598 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1599 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1600 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1601 	MATCH3D(3DSTATE_SO_DECL_LIST);
1602 	MATCH3D(3DSTATE_SO_BUFFER);
1603 	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1604 	MATCH3D(3DSTATE_SAMPLE_PATTERN);
1605 	MATCH3D(3DSTATE_3D_MODE);
1606 	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1607 	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1608 	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1609 
1610 	default:
1611 		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1612 			   *dw, pipeline, opcode, subopcode, numdw);
1613 		return numdw;
1614 	}
1615 }
1616 
dump_gfx_state_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1617 static int dump_gfx_state_command(struct drm_printer *p,
1618 				  struct xe_gt *gt,
1619 				  u32 *dw,
1620 				  int remaining_dw)
1621 {
1622 	u32 numdw = instr_dw(*dw);
1623 	u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1624 
1625 	/*
1626 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1627 	 * remaining size of the LRC.
1628 	 */
1629 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1630 		numdw = remaining_dw;
1631 
1632 	switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1633 	MATCH(STATE_WRITE_INLINE);
1634 
1635 	default:
1636 		drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1637 			   *dw, opcode, numdw);
1638 		return numdw;
1639 	}
1640 }
1641 
xe_lrc_dump_default(struct drm_printer * p,struct xe_gt * gt,enum xe_engine_class hwe_class)1642 void xe_lrc_dump_default(struct drm_printer *p,
1643 			 struct xe_gt *gt,
1644 			 enum xe_engine_class hwe_class)
1645 {
1646 	u32 *dw;
1647 	int remaining_dw, num_dw;
1648 
1649 	if (!gt->default_lrc[hwe_class]) {
1650 		drm_printf(p, "No default LRC for class %d\n", hwe_class);
1651 		return;
1652 	}
1653 
1654 	/*
1655 	 * Skip the beginning of the LRC since it contains the per-process
1656 	 * hardware status page.
1657 	 */
1658 	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1659 	remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
1660 
1661 	while (remaining_dw > 0) {
1662 		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1663 			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1664 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1665 			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1666 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1667 			num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
1668 		} else {
1669 			num_dw = min(instr_dw(*dw), remaining_dw);
1670 			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1671 				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1672 				   num_dw);
1673 		}
1674 
1675 		dw += num_dw;
1676 		remaining_dw -= num_dw;
1677 	}
1678 }
1679 
1680 struct instr_state {
1681 	u32 instr;
1682 	u16 num_dw;
1683 };
1684 
1685 static const struct instr_state xe_hpg_svg_state[] = {
1686 	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1687 	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1688 	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1689 	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1690 	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1691 	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1692 	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1693 	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1694 	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1695 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1696 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1697 	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1698 	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1699 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1700 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1701 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1702 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1703 	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1704 	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1705 	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1706 	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1707 	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1708 	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1709 	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1710 	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1711 	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1712 	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1713 	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1714 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1715 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1716 	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1717 	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1718 	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1719 	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1720 	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1721 	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1722 	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1723 	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1724 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1725 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1726 	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1727 	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1728 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1729 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1730 	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1731 	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1732 	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1733 	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1734 	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1735 	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1736 };
1737 
xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue * q,struct xe_bb * bb)1738 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1739 {
1740 	struct xe_gt *gt = q->hwe->gt;
1741 	struct xe_device *xe = gt_to_xe(gt);
1742 	const struct instr_state *state_table = NULL;
1743 	int state_table_size = 0;
1744 
1745 	/*
1746 	 * Wa_14019789679
1747 	 *
1748 	 * If the driver doesn't explicitly emit the SVG instructions while
1749 	 * setting up the default LRC, the context switch will write 0's
1750 	 * (noops) into the LRC memory rather than the expected instruction
1751 	 * headers.  Application contexts start out as a copy of the default
1752 	 * LRC, and if they also do not emit specific settings for some SVG
1753 	 * state, then on context restore they'll unintentionally inherit
1754 	 * whatever state setting the previous context had programmed into the
1755 	 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
1756 	 * prevent the hardware from resetting that state back to any specific
1757 	 * value).
1758 	 *
1759 	 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
1760 	 * since that's a specific state setting that can easily cause GPU
1761 	 * hangs if unintentionally inherited.  However to be safe we'll
1762 	 * continue to emit all of the SVG state since it's best not to leak
1763 	 * any of the state between contexts, even if that leakage is harmless.
1764 	 */
1765 	if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
1766 		state_table = xe_hpg_svg_state;
1767 		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1768 	}
1769 
1770 	if (!state_table) {
1771 		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1772 			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1773 		return;
1774 	}
1775 
1776 	for (int i = 0; i < state_table_size; i++) {
1777 		u32 instr = state_table[i].instr;
1778 		u16 num_dw = state_table[i].num_dw;
1779 		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1780 
1781 		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1782 		xe_gt_assert(gt, num_dw != 0);
1783 		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1784 
1785 		/*
1786 		 * Xe2's SVG context is the same as the one on DG2 / MTL
1787 		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1788 		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1789 		 * Just make the replacement here rather than defining a
1790 		 * whole separate table for the single trivial change.
1791 		 */
1792 		if (GRAPHICS_VER(xe) >= 20 &&
1793 		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1794 			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1795 
1796 		bb->cs[bb->len] = instr;
1797 		if (!is_single_dw)
1798 			bb->cs[bb->len] |= (num_dw - 2);
1799 
1800 		bb->len += num_dw;
1801 	}
1802 }
1803 
xe_lrc_snapshot_capture(struct xe_lrc * lrc)1804 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1805 {
1806 	struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1807 
1808 	if (!snapshot)
1809 		return NULL;
1810 
1811 	snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
1812 	snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc);
1813 	snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
1814 	snapshot->head = xe_lrc_ring_head(lrc);
1815 	snapshot->tail.internal = lrc->ring.tail;
1816 	snapshot->tail.memory = xe_lrc_ring_tail(lrc);
1817 	snapshot->start = xe_lrc_ring_start(lrc);
1818 	snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1819 	snapshot->seqno = xe_lrc_seqno(lrc);
1820 	snapshot->lrc_bo = xe_bo_get(lrc->bo);
1821 	snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
1822 	snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset -
1823 		LRC_WA_BB_SIZE;
1824 	snapshot->lrc_snapshot = NULL;
1825 	snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
1826 	snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
1827 	return snapshot;
1828 }
1829 
xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot * snapshot)1830 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
1831 {
1832 	struct xe_bo *bo;
1833 	struct iosys_map src;
1834 
1835 	if (!snapshot)
1836 		return;
1837 
1838 	bo = snapshot->lrc_bo;
1839 	snapshot->lrc_bo = NULL;
1840 
1841 	snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
1842 	if (!snapshot->lrc_snapshot)
1843 		goto put_bo;
1844 
1845 	xe_bo_lock(bo, false);
1846 	if (!ttm_bo_vmap(&bo->ttm, &src)) {
1847 		xe_map_memcpy_from(xe_bo_device(bo),
1848 				   snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
1849 				   snapshot->lrc_size);
1850 		ttm_bo_vunmap(&bo->ttm, &src);
1851 	} else {
1852 		kvfree(snapshot->lrc_snapshot);
1853 		snapshot->lrc_snapshot = NULL;
1854 	}
1855 	xe_bo_unlock(bo);
1856 put_bo:
1857 	xe_bo_put(bo);
1858 }
1859 
xe_lrc_snapshot_print(struct xe_lrc_snapshot * snapshot,struct drm_printer * p)1860 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
1861 {
1862 	unsigned long i;
1863 
1864 	if (!snapshot)
1865 		return;
1866 
1867 	drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
1868 	drm_printf(p, "\tHW Ring address: 0x%08x\n",
1869 		   snapshot->ring_addr);
1870 	drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
1871 		   snapshot->indirect_context_desc);
1872 	drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
1873 	drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1874 		   snapshot->tail.internal, snapshot->tail.memory);
1875 	drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start);
1876 	drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
1877 	drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
1878 	drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
1879 	drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
1880 
1881 	if (!snapshot->lrc_snapshot)
1882 		return;
1883 
1884 	drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
1885 	drm_puts(p, "\t[HWSP].data: ");
1886 	for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
1887 		u32 *val = snapshot->lrc_snapshot + i;
1888 		char dumped[ASCII85_BUFSZ];
1889 
1890 		drm_puts(p, ascii85_encode(*val, dumped));
1891 	}
1892 
1893 	drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
1894 	drm_puts(p, "\t[HWCTX].data: ");
1895 	for (; i < snapshot->lrc_size; i += sizeof(u32)) {
1896 		u32 *val = snapshot->lrc_snapshot + i;
1897 		char dumped[ASCII85_BUFSZ];
1898 
1899 		drm_puts(p, ascii85_encode(*val, dumped));
1900 	}
1901 	drm_puts(p, "\n");
1902 }
1903 
xe_lrc_snapshot_free(struct xe_lrc_snapshot * snapshot)1904 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
1905 {
1906 	if (!snapshot)
1907 		return;
1908 
1909 	kvfree(snapshot->lrc_snapshot);
1910 	if (snapshot->lrc_bo)
1911 		xe_bo_put(snapshot->lrc_bo);
1912 
1913 	kfree(snapshot);
1914 }
1915 
get_ctx_timestamp(struct xe_lrc * lrc,u32 engine_id,u64 * reg_ctx_ts)1916 static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
1917 {
1918 	u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id);
1919 	u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id);
1920 	struct xe_hw_engine *hwe;
1921 	u64 val;
1922 
1923 	hwe = xe_gt_hw_engine(lrc->gt, class, instance, false);
1924 	if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe),
1925 			    "Unexpected engine class:instance %d:%d for context utilization\n",
1926 			    class, instance))
1927 		return -1;
1928 
1929 	if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
1930 		val = xe_mmio_read64_2x32(&hwe->gt->mmio,
1931 					  RING_CTX_TIMESTAMP(hwe->mmio_base));
1932 	else
1933 		val = xe_mmio_read32(&hwe->gt->mmio,
1934 				     RING_CTX_TIMESTAMP(hwe->mmio_base));
1935 
1936 	*reg_ctx_ts = val;
1937 
1938 	return 0;
1939 }
1940 
1941 /**
1942  * xe_lrc_update_timestamp() - Update ctx timestamp
1943  * @lrc: Pointer to the lrc.
1944  * @old_ts: Old timestamp value
1945  *
1946  * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
1947  * update saved value. With support for active contexts, the calculation may be
1948  * slightly racy, so follow a read-again logic to ensure that the context is
1949  * still active before returning the right timestamp.
1950  *
1951  * Returns: New ctx timestamp value
1952  */
xe_lrc_update_timestamp(struct xe_lrc * lrc,u64 * old_ts)1953 u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts)
1954 {
1955 	u64 lrc_ts, reg_ts;
1956 	u32 engine_id;
1957 
1958 	*old_ts = lrc->ctx_timestamp;
1959 
1960 	lrc_ts = xe_lrc_ctx_timestamp(lrc);
1961 	/* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */
1962 	if (IS_SRIOV_VF(lrc_to_xe(lrc))) {
1963 		lrc->ctx_timestamp = lrc_ts;
1964 		goto done;
1965 	}
1966 
1967 	if (lrc_ts == CONTEXT_ACTIVE) {
1968 		engine_id = xe_lrc_engine_id(lrc);
1969 		if (!get_ctx_timestamp(lrc, engine_id, &reg_ts))
1970 			lrc->ctx_timestamp = reg_ts;
1971 
1972 		/* read lrc again to ensure context is still active */
1973 		lrc_ts = xe_lrc_ctx_timestamp(lrc);
1974 	}
1975 
1976 	/*
1977 	 * If context switched out, just use the lrc_ts. Note that this needs to
1978 	 * be a separate if condition.
1979 	 */
1980 	if (lrc_ts != CONTEXT_ACTIVE)
1981 		lrc->ctx_timestamp = lrc_ts;
1982 
1983 done:
1984 	trace_xe_lrc_update_timestamp(lrc, *old_ts);
1985 
1986 	return lrc->ctx_timestamp;
1987 }
1988 
1989 /**
1990  * xe_lrc_ring_is_idle() - LRC is idle
1991  * @lrc: Pointer to the lrc.
1992  *
1993  * Compare LRC ring head and tail to determine if idle.
1994  *
1995  * Return: True is ring is idle, False otherwise
1996  */
xe_lrc_ring_is_idle(struct xe_lrc * lrc)1997 bool xe_lrc_ring_is_idle(struct xe_lrc *lrc)
1998 {
1999 	return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc);
2000 }
2001