1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_lrc.h"
7
8 #include <generated/xe_wa_oob.h>
9
10 #include <linux/ascii85.h>
11
12 #include "instructions/xe_mi_commands.h"
13 #include "instructions/xe_gfxpipe_commands.h"
14 #include "instructions/xe_gfx_state_commands.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_lrc_layout.h"
17 #include "xe_bb.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_drm_client.h"
21 #include "xe_exec_queue_types.h"
22 #include "xe_gt.h"
23 #include "xe_gt_printk.h"
24 #include "xe_hw_fence.h"
25 #include "xe_map.h"
26 #include "xe_memirq.h"
27 #include "xe_sriov.h"
28 #include "xe_vm.h"
29 #include "xe_wa.h"
30
31 #define LRC_VALID BIT_ULL(0)
32 #define LRC_PRIVILEGE BIT_ULL(8)
33 #define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3)
34 #define LRC_LEGACY_64B_CONTEXT 3
35
36 #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
37 #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
38
39 #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
40
41 struct xe_lrc_snapshot {
42 struct xe_bo *lrc_bo;
43 void *lrc_snapshot;
44 unsigned long lrc_size, lrc_offset;
45
46 u32 context_desc;
47 u32 indirect_context_desc;
48 u32 head;
49 struct {
50 u32 internal;
51 u32 memory;
52 } tail;
53 u32 start_seqno;
54 u32 seqno;
55 u32 ctx_timestamp;
56 u32 ctx_job_timestamp;
57 };
58
59 static struct xe_device *
lrc_to_xe(struct xe_lrc * lrc)60 lrc_to_xe(struct xe_lrc *lrc)
61 {
62 return gt_to_xe(lrc->fence_ctx.gt);
63 }
64
xe_gt_lrc_size(struct xe_gt * gt,enum xe_engine_class class)65 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
66 {
67 struct xe_device *xe = gt_to_xe(gt);
68 size_t size;
69
70 switch (class) {
71 case XE_ENGINE_CLASS_RENDER:
72 if (GRAPHICS_VER(xe) >= 20)
73 size = 4 * SZ_4K;
74 else
75 size = 14 * SZ_4K;
76 break;
77 case XE_ENGINE_CLASS_COMPUTE:
78 /* 14 pages since graphics_ver == 11 */
79 if (GRAPHICS_VER(xe) >= 20)
80 size = 3 * SZ_4K;
81 else
82 size = 14 * SZ_4K;
83 break;
84 default:
85 WARN(1, "Unknown engine class: %d", class);
86 fallthrough;
87 case XE_ENGINE_CLASS_COPY:
88 case XE_ENGINE_CLASS_VIDEO_DECODE:
89 case XE_ENGINE_CLASS_VIDEO_ENHANCE:
90 case XE_ENGINE_CLASS_OTHER:
91 size = 2 * SZ_4K;
92 }
93
94 /* Add indirect ring state page */
95 if (xe_gt_has_indirect_ring_state(gt))
96 size += LRC_INDIRECT_RING_STATE_SIZE;
97
98 return size;
99 }
100
101 /*
102 * The per-platform tables are u8-encoded in @data. Decode @data and set the
103 * addresses' offset and commands in @regs. The following encoding is used
104 * for each byte. There are 2 steps: decoding commands and decoding addresses.
105 *
106 * Commands:
107 * [7]: create NOPs - number of NOPs are set in lower bits
108 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
109 * MI_LRI_FORCE_POSTED
110 * [5:0]: Number of NOPs or registers to set values to in case of
111 * MI_LOAD_REGISTER_IMM
112 *
113 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
114 * number of registers. They are set by using the REG/REG16 macros: the former
115 * is used for offsets smaller than 0x200 while the latter is for values bigger
116 * than that. Those macros already set all the bits documented below correctly:
117 *
118 * [7]: When a register offset needs more than 6 bits, use additional bytes, to
119 * follow, for the lower bits
120 * [6:0]: Register offset, without considering the engine base.
121 *
122 * This function only tweaks the commands and register offsets. Values are not
123 * filled out.
124 */
set_offsets(u32 * regs,const u8 * data,const struct xe_hw_engine * hwe)125 static void set_offsets(u32 *regs,
126 const u8 *data,
127 const struct xe_hw_engine *hwe)
128 #define NOP(x) (BIT(7) | (x))
129 #define LRI(count, flags) ((flags) << 6 | (count) | \
130 BUILD_BUG_ON_ZERO(count >= BIT(6)))
131 #define POSTED BIT(0)
132 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
133 #define REG16(x) \
134 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
135 (((x) >> 2) & 0x7f)
136 {
137 const u32 base = hwe->mmio_base;
138
139 while (*data) {
140 u8 count, flags;
141
142 if (*data & BIT(7)) { /* skip */
143 count = *data++ & ~BIT(7);
144 regs += count;
145 continue;
146 }
147
148 count = *data & 0x3f;
149 flags = *data >> 6;
150 data++;
151
152 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
153 if (flags & POSTED)
154 *regs |= MI_LRI_FORCE_POSTED;
155 *regs |= MI_LRI_LRM_CS_MMIO;
156 regs++;
157
158 xe_gt_assert(hwe->gt, count);
159 do {
160 u32 offset = 0;
161 u8 v;
162
163 do {
164 v = *data++;
165 offset <<= 7;
166 offset |= v & ~BIT(7);
167 } while (v & BIT(7));
168
169 regs[0] = base + (offset << 2);
170 regs += 2;
171 } while (--count);
172 }
173
174 *regs = MI_BATCH_BUFFER_END | BIT(0);
175 }
176
177 static const u8 gen12_xcs_offsets[] = {
178 NOP(1),
179 LRI(13, POSTED),
180 REG16(0x244),
181 REG(0x034),
182 REG(0x030),
183 REG(0x038),
184 REG(0x03c),
185 REG(0x168),
186 REG(0x140),
187 REG(0x110),
188 REG(0x1c0),
189 REG(0x1c4),
190 REG(0x1c8),
191 REG(0x180),
192 REG16(0x2b4),
193
194 NOP(5),
195 LRI(9, POSTED),
196 REG16(0x3a8),
197 REG16(0x28c),
198 REG16(0x288),
199 REG16(0x284),
200 REG16(0x280),
201 REG16(0x27c),
202 REG16(0x278),
203 REG16(0x274),
204 REG16(0x270),
205
206 0
207 };
208
209 static const u8 dg2_xcs_offsets[] = {
210 NOP(1),
211 LRI(15, POSTED),
212 REG16(0x244),
213 REG(0x034),
214 REG(0x030),
215 REG(0x038),
216 REG(0x03c),
217 REG(0x168),
218 REG(0x140),
219 REG(0x110),
220 REG(0x1c0),
221 REG(0x1c4),
222 REG(0x1c8),
223 REG(0x180),
224 REG16(0x2b4),
225 REG(0x120),
226 REG(0x124),
227
228 NOP(1),
229 LRI(9, POSTED),
230 REG16(0x3a8),
231 REG16(0x28c),
232 REG16(0x288),
233 REG16(0x284),
234 REG16(0x280),
235 REG16(0x27c),
236 REG16(0x278),
237 REG16(0x274),
238 REG16(0x270),
239
240 0
241 };
242
243 static const u8 gen12_rcs_offsets[] = {
244 NOP(1),
245 LRI(13, POSTED),
246 REG16(0x244),
247 REG(0x034),
248 REG(0x030),
249 REG(0x038),
250 REG(0x03c),
251 REG(0x168),
252 REG(0x140),
253 REG(0x110),
254 REG(0x1c0),
255 REG(0x1c4),
256 REG(0x1c8),
257 REG(0x180),
258 REG16(0x2b4),
259
260 NOP(5),
261 LRI(9, POSTED),
262 REG16(0x3a8),
263 REG16(0x28c),
264 REG16(0x288),
265 REG16(0x284),
266 REG16(0x280),
267 REG16(0x27c),
268 REG16(0x278),
269 REG16(0x274),
270 REG16(0x270),
271
272 LRI(3, POSTED),
273 REG(0x1b0),
274 REG16(0x5a8),
275 REG16(0x5ac),
276
277 NOP(6),
278 LRI(1, 0),
279 REG(0x0c8),
280 NOP(3 + 9 + 1),
281
282 LRI(51, POSTED),
283 REG16(0x588),
284 REG16(0x588),
285 REG16(0x588),
286 REG16(0x588),
287 REG16(0x588),
288 REG16(0x588),
289 REG(0x028),
290 REG(0x09c),
291 REG(0x0c0),
292 REG(0x178),
293 REG(0x17c),
294 REG16(0x358),
295 REG(0x170),
296 REG(0x150),
297 REG(0x154),
298 REG(0x158),
299 REG16(0x41c),
300 REG16(0x600),
301 REG16(0x604),
302 REG16(0x608),
303 REG16(0x60c),
304 REG16(0x610),
305 REG16(0x614),
306 REG16(0x618),
307 REG16(0x61c),
308 REG16(0x620),
309 REG16(0x624),
310 REG16(0x628),
311 REG16(0x62c),
312 REG16(0x630),
313 REG16(0x634),
314 REG16(0x638),
315 REG16(0x63c),
316 REG16(0x640),
317 REG16(0x644),
318 REG16(0x648),
319 REG16(0x64c),
320 REG16(0x650),
321 REG16(0x654),
322 REG16(0x658),
323 REG16(0x65c),
324 REG16(0x660),
325 REG16(0x664),
326 REG16(0x668),
327 REG16(0x66c),
328 REG16(0x670),
329 REG16(0x674),
330 REG16(0x678),
331 REG16(0x67c),
332 REG(0x068),
333 REG(0x084),
334 NOP(1),
335
336 0
337 };
338
339 static const u8 xehp_rcs_offsets[] = {
340 NOP(1),
341 LRI(13, POSTED),
342 REG16(0x244),
343 REG(0x034),
344 REG(0x030),
345 REG(0x038),
346 REG(0x03c),
347 REG(0x168),
348 REG(0x140),
349 REG(0x110),
350 REG(0x1c0),
351 REG(0x1c4),
352 REG(0x1c8),
353 REG(0x180),
354 REG16(0x2b4),
355
356 NOP(5),
357 LRI(9, POSTED),
358 REG16(0x3a8),
359 REG16(0x28c),
360 REG16(0x288),
361 REG16(0x284),
362 REG16(0x280),
363 REG16(0x27c),
364 REG16(0x278),
365 REG16(0x274),
366 REG16(0x270),
367
368 LRI(3, POSTED),
369 REG(0x1b0),
370 REG16(0x5a8),
371 REG16(0x5ac),
372
373 NOP(6),
374 LRI(1, 0),
375 REG(0x0c8),
376
377 0
378 };
379
380 static const u8 dg2_rcs_offsets[] = {
381 NOP(1),
382 LRI(15, POSTED),
383 REG16(0x244),
384 REG(0x034),
385 REG(0x030),
386 REG(0x038),
387 REG(0x03c),
388 REG(0x168),
389 REG(0x140),
390 REG(0x110),
391 REG(0x1c0),
392 REG(0x1c4),
393 REG(0x1c8),
394 REG(0x180),
395 REG16(0x2b4),
396 REG(0x120),
397 REG(0x124),
398
399 NOP(1),
400 LRI(9, POSTED),
401 REG16(0x3a8),
402 REG16(0x28c),
403 REG16(0x288),
404 REG16(0x284),
405 REG16(0x280),
406 REG16(0x27c),
407 REG16(0x278),
408 REG16(0x274),
409 REG16(0x270),
410
411 LRI(3, POSTED),
412 REG(0x1b0),
413 REG16(0x5a8),
414 REG16(0x5ac),
415
416 NOP(6),
417 LRI(1, 0),
418 REG(0x0c8),
419
420 0
421 };
422
423 static const u8 mtl_rcs_offsets[] = {
424 NOP(1),
425 LRI(15, POSTED),
426 REG16(0x244),
427 REG(0x034),
428 REG(0x030),
429 REG(0x038),
430 REG(0x03c),
431 REG(0x168),
432 REG(0x140),
433 REG(0x110),
434 REG(0x1c0),
435 REG(0x1c4),
436 REG(0x1c8),
437 REG(0x180),
438 REG16(0x2b4),
439 REG(0x120),
440 REG(0x124),
441
442 NOP(1),
443 LRI(9, POSTED),
444 REG16(0x3a8),
445 REG16(0x28c),
446 REG16(0x288),
447 REG16(0x284),
448 REG16(0x280),
449 REG16(0x27c),
450 REG16(0x278),
451 REG16(0x274),
452 REG16(0x270),
453
454 NOP(2),
455 LRI(2, POSTED),
456 REG16(0x5a8),
457 REG16(0x5ac),
458
459 NOP(6),
460 LRI(1, 0),
461 REG(0x0c8),
462
463 0
464 };
465
466 #define XE2_CTX_COMMON \
467 NOP(1), /* [0x00] */ \
468 LRI(15, POSTED), /* [0x01] */ \
469 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \
470 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \
471 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \
472 REG(0x038), /* [0x08] RING_BUFFER_START */ \
473 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \
474 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \
475 REG(0x140), /* [0x0e] BB_ADDR */ \
476 REG(0x110), /* [0x10] BB_STATE */ \
477 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \
478 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \
479 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
480 REG(0x180), /* [0x18] CCID */ \
481 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \
482 REG(0x120), /* [0x1c] PRT_BB_STATE */ \
483 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \
484 \
485 NOP(1), /* [0x20] */ \
486 LRI(9, POSTED), /* [0x21] */ \
487 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \
488 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \
489 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \
490 REG16(0x284), /* [0x28] dummy reg */ \
491 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \
492 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \
493 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \
494 REG16(0x274), /* [0x30] PTBP_UDW */ \
495 REG16(0x270) /* [0x32] PTBP_LDW */
496
497 static const u8 xe2_rcs_offsets[] = {
498 XE2_CTX_COMMON,
499
500 NOP(2), /* [0x34] */
501 LRI(2, POSTED), /* [0x36] */
502 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
503 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */
504
505 NOP(6), /* [0x41] */
506 LRI(1, 0), /* [0x47] */
507 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
508
509 0
510 };
511
512 static const u8 xe2_bcs_offsets[] = {
513 XE2_CTX_COMMON,
514
515 NOP(4 + 8 + 1), /* [0x34] */
516 LRI(2, POSTED), /* [0x41] */
517 REG16(0x200), /* [0x42] BCS_SWCTRL */
518 REG16(0x204), /* [0x44] BLIT_CCTL */
519
520 0
521 };
522
523 static const u8 xe2_xcs_offsets[] = {
524 XE2_CTX_COMMON,
525
526 0
527 };
528
529 static const u8 xe2_indirect_ring_state_offsets[] = {
530 NOP(1), /* [0x00] */
531 LRI(5, POSTED), /* [0x01] */
532 REG(0x034), /* [0x02] RING_BUFFER_HEAD */
533 REG(0x030), /* [0x04] RING_BUFFER_TAIL */
534 REG(0x038), /* [0x06] RING_BUFFER_START */
535 REG(0x048), /* [0x08] RING_BUFFER_START_UDW */
536 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */
537
538 NOP(5), /* [0x0c] */
539 LRI(9, POSTED), /* [0x11] */
540 REG(0x168), /* [0x12] BB_ADDR_UDW */
541 REG(0x140), /* [0x14] BB_ADDR */
542 REG(0x110), /* [0x16] BB_STATE */
543 REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */
544 REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */
545 REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */
546 REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */
547 REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */
548 REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */
549
550 NOP(12), /* [0x00] */
551
552 0
553 };
554
555 #undef REG16
556 #undef REG
557 #undef LRI
558 #undef NOP
559
reg_offsets(struct xe_device * xe,enum xe_engine_class class)560 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
561 {
562 if (class == XE_ENGINE_CLASS_RENDER) {
563 if (GRAPHICS_VER(xe) >= 20)
564 return xe2_rcs_offsets;
565 else if (GRAPHICS_VERx100(xe) >= 1270)
566 return mtl_rcs_offsets;
567 else if (GRAPHICS_VERx100(xe) >= 1255)
568 return dg2_rcs_offsets;
569 else if (GRAPHICS_VERx100(xe) >= 1250)
570 return xehp_rcs_offsets;
571 else
572 return gen12_rcs_offsets;
573 } else if (class == XE_ENGINE_CLASS_COPY) {
574 if (GRAPHICS_VER(xe) >= 20)
575 return xe2_bcs_offsets;
576 else
577 return gen12_xcs_offsets;
578 } else {
579 if (GRAPHICS_VER(xe) >= 20)
580 return xe2_xcs_offsets;
581 else if (GRAPHICS_VERx100(xe) >= 1255)
582 return dg2_xcs_offsets;
583 else
584 return gen12_xcs_offsets;
585 }
586 }
587
set_context_control(u32 * regs,struct xe_hw_engine * hwe)588 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
589 {
590 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
591 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
592
593 if (xe_gt_has_indirect_ring_state(hwe->gt))
594 regs[CTX_CONTEXT_CONTROL] |=
595 _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
596
597 /* TODO: Timestamp */
598 }
599
set_memory_based_intr(u32 * regs,struct xe_hw_engine * hwe)600 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
601 {
602 struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq;
603 struct xe_device *xe = gt_to_xe(hwe->gt);
604
605 if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
606 return;
607
608 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
609 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
610 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
611 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
612
613 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
614 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
615 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
616 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
617 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
618 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
619 }
620
lrc_ring_mi_mode(struct xe_hw_engine * hwe)621 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
622 {
623 struct xe_device *xe = gt_to_xe(hwe->gt);
624
625 if (GRAPHICS_VERx100(xe) >= 1250)
626 return 0x70;
627 else
628 return 0x60;
629 }
630
reset_stop_ring(u32 * regs,struct xe_hw_engine * hwe)631 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
632 {
633 int x;
634
635 x = lrc_ring_mi_mode(hwe);
636 regs[x + 1] &= ~STOP_RING;
637 regs[x + 1] |= STOP_RING << 16;
638 }
639
xe_lrc_has_indirect_ring_state(struct xe_lrc * lrc)640 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
641 {
642 return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
643 }
644
__xe_lrc_ring_offset(struct xe_lrc * lrc)645 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
646 {
647 return 0;
648 }
649
xe_lrc_pphwsp_offset(struct xe_lrc * lrc)650 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
651 {
652 return lrc->ring.size;
653 }
654
655 /* Make the magic macros work */
656 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
657 #define __xe_lrc_regs_offset xe_lrc_regs_offset
658
659 #define LRC_SEQNO_PPHWSP_OFFSET 512
660 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
661 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
662 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
663 #define LRC_PPHWSP_SIZE SZ_4K
664
xe_lrc_regs_offset(struct xe_lrc * lrc)665 u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
666 {
667 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
668 }
669
lrc_reg_size(struct xe_device * xe)670 static size_t lrc_reg_size(struct xe_device *xe)
671 {
672 if (GRAPHICS_VERx100(xe) >= 1250)
673 return 96 * sizeof(u32);
674 else
675 return 80 * sizeof(u32);
676 }
677
xe_lrc_skip_size(struct xe_device * xe)678 size_t xe_lrc_skip_size(struct xe_device *xe)
679 {
680 return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
681 }
682
__xe_lrc_seqno_offset(struct xe_lrc * lrc)683 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
684 {
685 /* The seqno is stored in the driver-defined portion of PPHWSP */
686 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
687 }
688
__xe_lrc_start_seqno_offset(struct xe_lrc * lrc)689 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
690 {
691 /* The start seqno is stored in the driver-defined portion of PPHWSP */
692 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
693 }
694
__xe_lrc_ctx_job_timestamp_offset(struct xe_lrc * lrc)695 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
696 {
697 /* The start seqno is stored in the driver-defined portion of PPHWSP */
698 return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
699 }
700
__xe_lrc_parallel_offset(struct xe_lrc * lrc)701 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
702 {
703 /* The parallel is stored in the driver-defined portion of PPHWSP */
704 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
705 }
706
__xe_lrc_ctx_timestamp_offset(struct xe_lrc * lrc)707 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
708 {
709 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
710 }
711
__xe_lrc_indirect_ring_offset(struct xe_lrc * lrc)712 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
713 {
714 /* Indirect ring state page is at the very end of LRC */
715 return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
716 }
717
718 #define DECL_MAP_ADDR_HELPERS(elem) \
719 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
720 { \
721 struct iosys_map map = lrc->bo->vmap; \
722 \
723 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \
724 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
725 return map; \
726 } \
727 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
728 { \
729 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
730 } \
731
732 DECL_MAP_ADDR_HELPERS(ring)
DECL_MAP_ADDR_HELPERS(pphwsp)733 DECL_MAP_ADDR_HELPERS(pphwsp)
734 DECL_MAP_ADDR_HELPERS(seqno)
735 DECL_MAP_ADDR_HELPERS(regs)
736 DECL_MAP_ADDR_HELPERS(start_seqno)
737 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
738 DECL_MAP_ADDR_HELPERS(ctx_timestamp)
739 DECL_MAP_ADDR_HELPERS(parallel)
740 DECL_MAP_ADDR_HELPERS(indirect_ring)
741
742 #undef DECL_MAP_ADDR_HELPERS
743
744 /**
745 * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
746 * @lrc: Pointer to the lrc.
747 *
748 * Returns: ctx timestamp GGTT address
749 */
750 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
751 {
752 return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
753 }
754
755 /**
756 * xe_lrc_ctx_timestamp() - Read ctx timestamp value
757 * @lrc: Pointer to the lrc.
758 *
759 * Returns: ctx timestamp value
760 */
xe_lrc_ctx_timestamp(struct xe_lrc * lrc)761 u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
762 {
763 struct xe_device *xe = lrc_to_xe(lrc);
764 struct iosys_map map;
765
766 map = __xe_lrc_ctx_timestamp_map(lrc);
767 return xe_map_read32(xe, &map);
768 }
769
770 /**
771 * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
772 * @lrc: Pointer to the lrc.
773 *
774 * Returns: ctx timestamp job GGTT address
775 */
xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc * lrc)776 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
777 {
778 return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
779 }
780
781 /**
782 * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
783 * @lrc: Pointer to the lrc.
784 *
785 * Returns: ctx timestamp job value
786 */
xe_lrc_ctx_job_timestamp(struct xe_lrc * lrc)787 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
788 {
789 struct xe_device *xe = lrc_to_xe(lrc);
790 struct iosys_map map;
791
792 map = __xe_lrc_ctx_job_timestamp_map(lrc);
793 return xe_map_read32(xe, &map);
794 }
795
xe_lrc_ggtt_addr(struct xe_lrc * lrc)796 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
797 {
798 return __xe_lrc_pphwsp_ggtt_addr(lrc);
799 }
800
xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc * lrc)801 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
802 {
803 if (!xe_lrc_has_indirect_ring_state(lrc))
804 return 0;
805
806 return __xe_lrc_indirect_ring_ggtt_addr(lrc);
807 }
808
xe_lrc_read_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr)809 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
810 {
811 struct xe_device *xe = lrc_to_xe(lrc);
812 struct iosys_map map;
813
814 map = __xe_lrc_indirect_ring_map(lrc);
815 iosys_map_incr(&map, reg_nr * sizeof(u32));
816 return xe_map_read32(xe, &map);
817 }
818
xe_lrc_write_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)819 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
820 int reg_nr, u32 val)
821 {
822 struct xe_device *xe = lrc_to_xe(lrc);
823 struct iosys_map map;
824
825 map = __xe_lrc_indirect_ring_map(lrc);
826 iosys_map_incr(&map, reg_nr * sizeof(u32));
827 xe_map_write32(xe, &map, val);
828 }
829
xe_lrc_read_ctx_reg(struct xe_lrc * lrc,int reg_nr)830 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
831 {
832 struct xe_device *xe = lrc_to_xe(lrc);
833 struct iosys_map map;
834
835 map = __xe_lrc_regs_map(lrc);
836 iosys_map_incr(&map, reg_nr * sizeof(u32));
837 return xe_map_read32(xe, &map);
838 }
839
xe_lrc_write_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)840 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
841 {
842 struct xe_device *xe = lrc_to_xe(lrc);
843 struct iosys_map map;
844
845 map = __xe_lrc_regs_map(lrc);
846 iosys_map_incr(&map, reg_nr * sizeof(u32));
847 xe_map_write32(xe, &map, val);
848 }
849
empty_lrc_data(struct xe_hw_engine * hwe)850 static void *empty_lrc_data(struct xe_hw_engine *hwe)
851 {
852 struct xe_gt *gt = hwe->gt;
853 void *data;
854 u32 *regs;
855
856 data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
857 if (!data)
858 return NULL;
859
860 /* 1st page: Per-Process of HW status Page */
861 regs = data + LRC_PPHWSP_SIZE;
862 set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
863 set_context_control(regs, hwe);
864 set_memory_based_intr(regs, hwe);
865 reset_stop_ring(regs, hwe);
866 if (xe_gt_has_indirect_ring_state(gt)) {
867 regs = data + xe_gt_lrc_size(gt, hwe->class) -
868 LRC_INDIRECT_RING_STATE_SIZE;
869 set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
870 }
871
872 return data;
873 }
874
xe_lrc_set_ppgtt(struct xe_lrc * lrc,struct xe_vm * vm)875 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
876 {
877 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
878
879 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
880 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
881 }
882
xe_lrc_finish(struct xe_lrc * lrc)883 static void xe_lrc_finish(struct xe_lrc *lrc)
884 {
885 xe_hw_fence_ctx_finish(&lrc->fence_ctx);
886 xe_bo_lock(lrc->bo, false);
887 xe_bo_unpin(lrc->bo);
888 xe_bo_unlock(lrc->bo);
889 xe_bo_put(lrc->bo);
890 }
891
892 #define PVC_CTX_ASID (0x2e + 1)
893 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
894
xe_lrc_init(struct xe_lrc * lrc,struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size)895 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
896 struct xe_vm *vm, u32 ring_size)
897 {
898 struct xe_gt *gt = hwe->gt;
899 struct xe_tile *tile = gt_to_tile(gt);
900 struct xe_device *xe = gt_to_xe(gt);
901 struct iosys_map map;
902 void *init_data = NULL;
903 u32 arb_enable;
904 u32 lrc_size;
905 int err;
906
907 kref_init(&lrc->refcount);
908 lrc->flags = 0;
909 lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
910 if (xe_gt_has_indirect_ring_state(gt))
911 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
912
913 /*
914 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
915 * via VM bind calls.
916 */
917 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
918 ttm_bo_type_kernel,
919 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
920 XE_BO_FLAG_GGTT |
921 XE_BO_FLAG_GGTT_INVALIDATE);
922 if (IS_ERR(lrc->bo))
923 return PTR_ERR(lrc->bo);
924
925 lrc->size = lrc_size;
926 lrc->tile = gt_to_tile(hwe->gt);
927 lrc->ring.size = ring_size;
928 lrc->ring.tail = 0;
929 lrc->ctx_timestamp = 0;
930
931 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
932 hwe->fence_irq, hwe->name);
933
934 if (!gt->default_lrc[hwe->class]) {
935 init_data = empty_lrc_data(hwe);
936 if (!init_data) {
937 err = -ENOMEM;
938 goto err_lrc_finish;
939 }
940 }
941
942 /*
943 * Init Per-Process of HW status Page, LRC / context state to known
944 * values
945 */
946 map = __xe_lrc_pphwsp_map(lrc);
947 if (!init_data) {
948 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
949 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
950 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
951 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
952 } else {
953 xe_map_memcpy_to(xe, &map, 0, init_data,
954 xe_gt_lrc_size(gt, hwe->class));
955 kfree(init_data);
956 }
957
958 if (vm) {
959 xe_lrc_set_ppgtt(lrc, vm);
960
961 if (vm->xef)
962 xe_drm_client_add_bo(vm->xef->client, lrc->bo);
963 }
964
965 if (xe_gt_has_indirect_ring_state(gt)) {
966 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
967 __xe_lrc_indirect_ring_ggtt_addr(lrc));
968
969 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
970 __xe_lrc_ring_ggtt_addr(lrc));
971 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
972 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
973 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
974 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
975 RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
976 } else {
977 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
978 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
979 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
980 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
981 RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
982 }
983
984 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
985
986 if (xe->info.has_asid && vm)
987 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
988
989 lrc->desc = LRC_VALID;
990 lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
991 /* TODO: Priority */
992
993 /* While this appears to have something about privileged batches or
994 * some such, it really just means PPGTT mode.
995 */
996 if (vm)
997 lrc->desc |= LRC_PRIVILEGE;
998
999 if (GRAPHICS_VERx100(xe) < 1250) {
1000 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
1001 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
1002 }
1003
1004 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1005 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
1006
1007 map = __xe_lrc_seqno_map(lrc);
1008 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1009
1010 map = __xe_lrc_start_seqno_map(lrc);
1011 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1012
1013 return 0;
1014
1015 err_lrc_finish:
1016 xe_lrc_finish(lrc);
1017 return err;
1018 }
1019
1020 /**
1021 * xe_lrc_create - Create a LRC
1022 * @hwe: Hardware Engine
1023 * @vm: The VM (address space)
1024 * @ring_size: LRC ring size
1025 *
1026 * Allocate and initialize the Logical Ring Context (LRC).
1027 *
1028 * Return pointer to created LRC upon success and an error pointer
1029 * upon failure.
1030 */
xe_lrc_create(struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size)1031 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
1032 u32 ring_size)
1033 {
1034 struct xe_lrc *lrc;
1035 int err;
1036
1037 lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
1038 if (!lrc)
1039 return ERR_PTR(-ENOMEM);
1040
1041 err = xe_lrc_init(lrc, hwe, vm, ring_size);
1042 if (err) {
1043 kfree(lrc);
1044 return ERR_PTR(err);
1045 }
1046
1047 return lrc;
1048 }
1049
1050 /**
1051 * xe_lrc_destroy - Destroy the LRC
1052 * @ref: reference to LRC
1053 *
1054 * Called when ref == 0, release resources held by the Logical Ring Context
1055 * (LRC) and free the LRC memory.
1056 */
xe_lrc_destroy(struct kref * ref)1057 void xe_lrc_destroy(struct kref *ref)
1058 {
1059 struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
1060
1061 xe_lrc_finish(lrc);
1062 kfree(lrc);
1063 }
1064
xe_lrc_set_ring_tail(struct xe_lrc * lrc,u32 tail)1065 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
1066 {
1067 if (xe_lrc_has_indirect_ring_state(lrc))
1068 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
1069 else
1070 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
1071 }
1072
xe_lrc_ring_tail(struct xe_lrc * lrc)1073 u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1074 {
1075 if (xe_lrc_has_indirect_ring_state(lrc))
1076 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1077 else
1078 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1079 }
1080
xe_lrc_set_ring_head(struct xe_lrc * lrc,u32 head)1081 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1082 {
1083 if (xe_lrc_has_indirect_ring_state(lrc))
1084 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1085 else
1086 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
1087 }
1088
xe_lrc_ring_head(struct xe_lrc * lrc)1089 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1090 {
1091 if (xe_lrc_has_indirect_ring_state(lrc))
1092 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1093 else
1094 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
1095 }
1096
xe_lrc_ring_space(struct xe_lrc * lrc)1097 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1098 {
1099 const u32 head = xe_lrc_ring_head(lrc);
1100 const u32 tail = lrc->ring.tail;
1101 const u32 size = lrc->ring.size;
1102
1103 return ((head - tail - 1) & (size - 1)) + 1;
1104 }
1105
__xe_lrc_write_ring(struct xe_lrc * lrc,struct iosys_map ring,const void * data,size_t size)1106 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1107 const void *data, size_t size)
1108 {
1109 struct xe_device *xe = lrc_to_xe(lrc);
1110
1111 iosys_map_incr(&ring, lrc->ring.tail);
1112 xe_map_memcpy_to(xe, &ring, 0, data, size);
1113 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1114 }
1115
xe_lrc_write_ring(struct xe_lrc * lrc,const void * data,size_t size)1116 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1117 {
1118 struct xe_device *xe = lrc_to_xe(lrc);
1119 struct iosys_map ring;
1120 u32 rhs;
1121 size_t aligned_size;
1122
1123 xe_assert(xe, IS_ALIGNED(size, 4));
1124 aligned_size = ALIGN(size, 8);
1125
1126 ring = __xe_lrc_ring_map(lrc);
1127
1128 xe_assert(xe, lrc->ring.tail < lrc->ring.size);
1129 rhs = lrc->ring.size - lrc->ring.tail;
1130 if (size > rhs) {
1131 __xe_lrc_write_ring(lrc, ring, data, rhs);
1132 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1133 } else {
1134 __xe_lrc_write_ring(lrc, ring, data, size);
1135 }
1136
1137 if (aligned_size > size) {
1138 u32 noop = MI_NOOP;
1139
1140 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1141 }
1142 }
1143
xe_lrc_descriptor(struct xe_lrc * lrc)1144 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1145 {
1146 return lrc->desc | xe_lrc_ggtt_addr(lrc);
1147 }
1148
xe_lrc_seqno_ggtt_addr(struct xe_lrc * lrc)1149 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1150 {
1151 return __xe_lrc_seqno_ggtt_addr(lrc);
1152 }
1153
1154 /**
1155 * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1156 *
1157 * Allocate but don't initialize an lrc seqno fence.
1158 *
1159 * Return: Pointer to the allocated fence or
1160 * negative error pointer on error.
1161 */
xe_lrc_alloc_seqno_fence(void)1162 struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1163 {
1164 return xe_hw_fence_alloc();
1165 }
1166
1167 /**
1168 * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1169 * @fence: Pointer to the fence to free.
1170 *
1171 * Frees an lrc seqno fence that hasn't yet been
1172 * initialized.
1173 */
xe_lrc_free_seqno_fence(struct dma_fence * fence)1174 void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1175 {
1176 xe_hw_fence_free(fence);
1177 }
1178
1179 /**
1180 * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1181 * @lrc: Pointer to the lrc.
1182 * @fence: Pointer to the fence to initialize.
1183 *
1184 * Initializes a pre-allocated lrc seqno fence.
1185 * After initialization, the fence is subject to normal
1186 * dma-fence refcounting.
1187 */
xe_lrc_init_seqno_fence(struct xe_lrc * lrc,struct dma_fence * fence)1188 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1189 {
1190 xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1191 }
1192
xe_lrc_seqno(struct xe_lrc * lrc)1193 s32 xe_lrc_seqno(struct xe_lrc *lrc)
1194 {
1195 struct iosys_map map = __xe_lrc_seqno_map(lrc);
1196
1197 return xe_map_read32(lrc_to_xe(lrc), &map);
1198 }
1199
xe_lrc_start_seqno(struct xe_lrc * lrc)1200 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1201 {
1202 struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1203
1204 return xe_map_read32(lrc_to_xe(lrc), &map);
1205 }
1206
xe_lrc_start_seqno_ggtt_addr(struct xe_lrc * lrc)1207 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1208 {
1209 return __xe_lrc_start_seqno_ggtt_addr(lrc);
1210 }
1211
xe_lrc_parallel_ggtt_addr(struct xe_lrc * lrc)1212 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1213 {
1214 return __xe_lrc_parallel_ggtt_addr(lrc);
1215 }
1216
xe_lrc_parallel_map(struct xe_lrc * lrc)1217 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1218 {
1219 return __xe_lrc_parallel_map(lrc);
1220 }
1221
instr_dw(u32 cmd_header)1222 static int instr_dw(u32 cmd_header)
1223 {
1224 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1225 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1226 GFXPIPE_SINGLE_DW_CMD(0, 0))
1227 return 1;
1228
1229 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1230 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1231 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1232
1233 /* Most instructions have the # of dwords (minus 2) in 7:0 */
1234 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1235 }
1236
dump_mi_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1237 static int dump_mi_command(struct drm_printer *p,
1238 struct xe_gt *gt,
1239 u32 *dw,
1240 int remaining_dw)
1241 {
1242 u32 inst_header = *dw;
1243 u32 numdw = instr_dw(inst_header);
1244 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1245 int num_noop;
1246
1247 /* First check for commands that don't have/use a '# DW' field */
1248 switch (inst_header & MI_OPCODE) {
1249 case MI_NOOP:
1250 num_noop = 1;
1251 while (num_noop < remaining_dw &&
1252 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1253 num_noop++;
1254 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1255 return num_noop;
1256
1257 case MI_TOPOLOGY_FILTER:
1258 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1259 return 1;
1260
1261 case MI_BATCH_BUFFER_END:
1262 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1263 /* Return 'remaining_dw' to consume the rest of the LRC */
1264 return remaining_dw;
1265 }
1266
1267 /*
1268 * Any remaining commands include a # of dwords. We should make sure
1269 * it doesn't exceed the remaining size of the LRC.
1270 */
1271 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1272 numdw = remaining_dw;
1273
1274 switch (inst_header & MI_OPCODE) {
1275 case MI_LOAD_REGISTER_IMM:
1276 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1277 inst_header, (numdw - 1) / 2);
1278 for (int i = 1; i < numdw; i += 2)
1279 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1280 return numdw;
1281
1282 case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1283 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1284 inst_header,
1285 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1286 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1287 if (numdw == 4)
1288 drm_printf(p, " - %#6x = %#010llx\n",
1289 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1290 else
1291 drm_printf(p, " - %*ph (%s)\n",
1292 (int)sizeof(u32) * (numdw - 1), dw + 1,
1293 numdw < 4 ? "truncated" : "malformed");
1294 return numdw;
1295
1296 case MI_FORCE_WAKEUP:
1297 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1298 return numdw;
1299
1300 default:
1301 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1302 inst_header, opcode, numdw);
1303 return numdw;
1304 }
1305 }
1306
dump_gfxpipe_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1307 static int dump_gfxpipe_command(struct drm_printer *p,
1308 struct xe_gt *gt,
1309 u32 *dw,
1310 int remaining_dw)
1311 {
1312 u32 numdw = instr_dw(*dw);
1313 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1314 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1315 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1316
1317 /*
1318 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1319 * remaining size of the LRC.
1320 */
1321 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1322 numdw = remaining_dw;
1323
1324 switch (*dw & GFXPIPE_MATCH_MASK) {
1325 #define MATCH(cmd) \
1326 case cmd: \
1327 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1328 return numdw
1329 #define MATCH3D(cmd) \
1330 case CMD_##cmd: \
1331 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1332 return numdw
1333
1334 MATCH(STATE_BASE_ADDRESS);
1335 MATCH(STATE_SIP);
1336 MATCH(GPGPU_CSR_BASE_ADDRESS);
1337 MATCH(STATE_COMPUTE_MODE);
1338 MATCH3D(3DSTATE_BTD);
1339 MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1340 MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
1341
1342 MATCH3D(3DSTATE_VF_STATISTICS);
1343
1344 MATCH(PIPELINE_SELECT);
1345
1346 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1347 MATCH3D(3DSTATE_CLEAR_PARAMS);
1348 MATCH3D(3DSTATE_DEPTH_BUFFER);
1349 MATCH3D(3DSTATE_STENCIL_BUFFER);
1350 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1351 MATCH3D(3DSTATE_VERTEX_BUFFERS);
1352 MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1353 MATCH3D(3DSTATE_INDEX_BUFFER);
1354 MATCH3D(3DSTATE_VF);
1355 MATCH3D(3DSTATE_MULTISAMPLE);
1356 MATCH3D(3DSTATE_CC_STATE_POINTERS);
1357 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1358 MATCH3D(3DSTATE_VS);
1359 MATCH3D(3DSTATE_GS);
1360 MATCH3D(3DSTATE_CLIP);
1361 MATCH3D(3DSTATE_SF);
1362 MATCH3D(3DSTATE_WM);
1363 MATCH3D(3DSTATE_CONSTANT_VS);
1364 MATCH3D(3DSTATE_CONSTANT_GS);
1365 MATCH3D(3DSTATE_CONSTANT_PS);
1366 MATCH3D(3DSTATE_SAMPLE_MASK);
1367 MATCH3D(3DSTATE_CONSTANT_HS);
1368 MATCH3D(3DSTATE_CONSTANT_DS);
1369 MATCH3D(3DSTATE_HS);
1370 MATCH3D(3DSTATE_TE);
1371 MATCH3D(3DSTATE_DS);
1372 MATCH3D(3DSTATE_STREAMOUT);
1373 MATCH3D(3DSTATE_SBE);
1374 MATCH3D(3DSTATE_PS);
1375 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1376 MATCH3D(3DSTATE_CPS_POINTERS);
1377 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1378 MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1379 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1380 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1381 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1382 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1383 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1384 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1385 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1386 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1387 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1388 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1389 MATCH3D(3DSTATE_VF_INSTANCING);
1390 MATCH3D(3DSTATE_VF_SGVS);
1391 MATCH3D(3DSTATE_VF_TOPOLOGY);
1392 MATCH3D(3DSTATE_WM_CHROMAKEY);
1393 MATCH3D(3DSTATE_PS_BLEND);
1394 MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1395 MATCH3D(3DSTATE_PS_EXTRA);
1396 MATCH3D(3DSTATE_RASTER);
1397 MATCH3D(3DSTATE_SBE_SWIZ);
1398 MATCH3D(3DSTATE_WM_HZ_OP);
1399 MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1400 MATCH3D(3DSTATE_VF_SGVS_2);
1401 MATCH3D(3DSTATE_VFG);
1402 MATCH3D(3DSTATE_URB_ALLOC_VS);
1403 MATCH3D(3DSTATE_URB_ALLOC_HS);
1404 MATCH3D(3DSTATE_URB_ALLOC_DS);
1405 MATCH3D(3DSTATE_URB_ALLOC_GS);
1406 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1407 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1408 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1409 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1410 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1411 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1412 MATCH3D(3DSTATE_AMFS);
1413 MATCH3D(3DSTATE_DEPTH_BOUNDS);
1414 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1415 MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1416 MATCH3D(3DSTATE_MESH_CONTROL);
1417 MATCH3D(3DSTATE_MESH_DISTRIB);
1418 MATCH3D(3DSTATE_TASK_REDISTRIB);
1419 MATCH3D(3DSTATE_MESH_SHADER);
1420 MATCH3D(3DSTATE_MESH_SHADER_DATA);
1421 MATCH3D(3DSTATE_TASK_CONTROL);
1422 MATCH3D(3DSTATE_TASK_SHADER);
1423 MATCH3D(3DSTATE_TASK_SHADER_DATA);
1424 MATCH3D(3DSTATE_URB_ALLOC_MESH);
1425 MATCH3D(3DSTATE_URB_ALLOC_TASK);
1426 MATCH3D(3DSTATE_CLIP_MESH);
1427 MATCH3D(3DSTATE_SBE_MESH);
1428 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1429
1430 MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1431 MATCH3D(3DSTATE_CHROMA_KEY);
1432 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1433 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1434 MATCH3D(3DSTATE_LINE_STIPPLE);
1435 MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1436 MATCH3D(3DSTATE_MONOFILTER_SIZE);
1437 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1438 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1439 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1440 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1441 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1442 MATCH3D(3DSTATE_SO_DECL_LIST);
1443 MATCH3D(3DSTATE_SO_BUFFER);
1444 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1445 MATCH3D(3DSTATE_SAMPLE_PATTERN);
1446 MATCH3D(3DSTATE_3D_MODE);
1447 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1448 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1449 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1450
1451 default:
1452 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1453 *dw, pipeline, opcode, subopcode, numdw);
1454 return numdw;
1455 }
1456 }
1457
dump_gfx_state_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1458 static int dump_gfx_state_command(struct drm_printer *p,
1459 struct xe_gt *gt,
1460 u32 *dw,
1461 int remaining_dw)
1462 {
1463 u32 numdw = instr_dw(*dw);
1464 u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1465
1466 /*
1467 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1468 * remaining size of the LRC.
1469 */
1470 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1471 numdw = remaining_dw;
1472
1473 switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1474 MATCH(STATE_WRITE_INLINE);
1475
1476 default:
1477 drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1478 *dw, opcode, numdw);
1479 return numdw;
1480 }
1481 }
1482
xe_lrc_dump_default(struct drm_printer * p,struct xe_gt * gt,enum xe_engine_class hwe_class)1483 void xe_lrc_dump_default(struct drm_printer *p,
1484 struct xe_gt *gt,
1485 enum xe_engine_class hwe_class)
1486 {
1487 u32 *dw;
1488 int remaining_dw, num_dw;
1489
1490 if (!gt->default_lrc[hwe_class]) {
1491 drm_printf(p, "No default LRC for class %d\n", hwe_class);
1492 return;
1493 }
1494
1495 /*
1496 * Skip the beginning of the LRC since it contains the per-process
1497 * hardware status page.
1498 */
1499 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1500 remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
1501
1502 while (remaining_dw > 0) {
1503 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1504 num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1505 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1506 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1507 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1508 num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
1509 } else {
1510 num_dw = min(instr_dw(*dw), remaining_dw);
1511 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1512 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1513 num_dw);
1514 }
1515
1516 dw += num_dw;
1517 remaining_dw -= num_dw;
1518 }
1519 }
1520
1521 struct instr_state {
1522 u32 instr;
1523 u16 num_dw;
1524 };
1525
1526 static const struct instr_state xe_hpg_svg_state[] = {
1527 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1528 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1529 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1530 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1531 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1532 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1533 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1534 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1535 { .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1536 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1537 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1538 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1539 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1540 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1541 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1542 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1543 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1544 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1545 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1546 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1547 { .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1548 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1549 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1550 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1551 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1552 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1553 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1554 { .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1555 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1556 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1557 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1558 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1559 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1560 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1561 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1562 { .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1563 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1564 { .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1565 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1566 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1567 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1568 { .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1569 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1570 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1571 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1572 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1573 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1574 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1575 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1576 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1577 };
1578
xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue * q,struct xe_bb * bb)1579 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1580 {
1581 struct xe_gt *gt = q->hwe->gt;
1582 struct xe_device *xe = gt_to_xe(gt);
1583 const struct instr_state *state_table = NULL;
1584 int state_table_size = 0;
1585
1586 /*
1587 * Wa_14019789679
1588 *
1589 * If the driver doesn't explicitly emit the SVG instructions while
1590 * setting up the default LRC, the context switch will write 0's
1591 * (noops) into the LRC memory rather than the expected instruction
1592 * headers. Application contexts start out as a copy of the default
1593 * LRC, and if they also do not emit specific settings for some SVG
1594 * state, then on context restore they'll unintentionally inherit
1595 * whatever state setting the previous context had programmed into the
1596 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
1597 * prevent the hardware from resetting that state back to any specific
1598 * value).
1599 *
1600 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
1601 * since that's a specific state setting that can easily cause GPU
1602 * hangs if unintentionally inherited. However to be safe we'll
1603 * continue to emit all of the SVG state since it's best not to leak
1604 * any of the state between contexts, even if that leakage is harmless.
1605 */
1606 if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
1607 state_table = xe_hpg_svg_state;
1608 state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1609 }
1610
1611 if (!state_table) {
1612 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1613 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1614 return;
1615 }
1616
1617 for (int i = 0; i < state_table_size; i++) {
1618 u32 instr = state_table[i].instr;
1619 u16 num_dw = state_table[i].num_dw;
1620 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1621
1622 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1623 xe_gt_assert(gt, num_dw != 0);
1624 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1625
1626 /*
1627 * Xe2's SVG context is the same as the one on DG2 / MTL
1628 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1629 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1630 * Just make the replacement here rather than defining a
1631 * whole separate table for the single trivial change.
1632 */
1633 if (GRAPHICS_VER(xe) >= 20 &&
1634 instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1635 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1636
1637 bb->cs[bb->len] = instr;
1638 if (!is_single_dw)
1639 bb->cs[bb->len] |= (num_dw - 2);
1640
1641 bb->len += num_dw;
1642 }
1643 }
1644
xe_lrc_snapshot_capture(struct xe_lrc * lrc)1645 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1646 {
1647 struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1648
1649 if (!snapshot)
1650 return NULL;
1651
1652 if (lrc->bo->vm)
1653 xe_vm_get(lrc->bo->vm);
1654
1655 snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
1656 snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
1657 snapshot->head = xe_lrc_ring_head(lrc);
1658 snapshot->tail.internal = lrc->ring.tail;
1659 snapshot->tail.memory = xe_lrc_ring_tail(lrc);
1660 snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1661 snapshot->seqno = xe_lrc_seqno(lrc);
1662 snapshot->lrc_bo = xe_bo_get(lrc->bo);
1663 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
1664 snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
1665 snapshot->lrc_snapshot = NULL;
1666 snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1667 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
1668 return snapshot;
1669 }
1670
xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot * snapshot)1671 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
1672 {
1673 struct xe_bo *bo;
1674 struct xe_vm *vm;
1675 struct iosys_map src;
1676
1677 if (!snapshot)
1678 return;
1679
1680 bo = snapshot->lrc_bo;
1681 vm = bo->vm;
1682 snapshot->lrc_bo = NULL;
1683
1684 snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
1685 if (!snapshot->lrc_snapshot)
1686 goto put_bo;
1687
1688 xe_bo_lock(bo, false);
1689 if (!ttm_bo_vmap(&bo->ttm, &src)) {
1690 xe_map_memcpy_from(xe_bo_device(bo),
1691 snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
1692 snapshot->lrc_size);
1693 ttm_bo_vunmap(&bo->ttm, &src);
1694 } else {
1695 kvfree(snapshot->lrc_snapshot);
1696 snapshot->lrc_snapshot = NULL;
1697 }
1698 xe_bo_unlock(bo);
1699 put_bo:
1700 xe_bo_put(bo);
1701 if (vm)
1702 xe_vm_put(vm);
1703 }
1704
xe_lrc_snapshot_print(struct xe_lrc_snapshot * snapshot,struct drm_printer * p)1705 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
1706 {
1707 unsigned long i;
1708
1709 if (!snapshot)
1710 return;
1711
1712 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
1713 drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
1714 snapshot->indirect_context_desc);
1715 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
1716 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1717 snapshot->tail.internal, snapshot->tail.memory);
1718 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
1719 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
1720 drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
1721 drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
1722
1723 if (!snapshot->lrc_snapshot)
1724 return;
1725
1726 drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
1727 drm_puts(p, "\t[HWSP].data: ");
1728 for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
1729 u32 *val = snapshot->lrc_snapshot + i;
1730 char dumped[ASCII85_BUFSZ];
1731
1732 drm_puts(p, ascii85_encode(*val, dumped));
1733 }
1734
1735 drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
1736 drm_puts(p, "\t[HWCTX].data: ");
1737 for (; i < snapshot->lrc_size; i += sizeof(u32)) {
1738 u32 *val = snapshot->lrc_snapshot + i;
1739 char dumped[ASCII85_BUFSZ];
1740
1741 drm_puts(p, ascii85_encode(*val, dumped));
1742 }
1743 drm_puts(p, "\n");
1744 }
1745
xe_lrc_snapshot_free(struct xe_lrc_snapshot * snapshot)1746 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
1747 {
1748 if (!snapshot)
1749 return;
1750
1751 kvfree(snapshot->lrc_snapshot);
1752 if (snapshot->lrc_bo) {
1753 struct xe_vm *vm;
1754
1755 vm = snapshot->lrc_bo->vm;
1756 xe_bo_put(snapshot->lrc_bo);
1757 if (vm)
1758 xe_vm_put(vm);
1759 }
1760 kfree(snapshot);
1761 }
1762
1763 /**
1764 * xe_lrc_update_timestamp() - Update ctx timestamp
1765 * @lrc: Pointer to the lrc.
1766 * @old_ts: Old timestamp value
1767 *
1768 * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
1769 * update saved value.
1770 *
1771 * Returns: New ctx timestamp value
1772 */
xe_lrc_update_timestamp(struct xe_lrc * lrc,u32 * old_ts)1773 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
1774 {
1775 *old_ts = lrc->ctx_timestamp;
1776
1777 lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1778
1779 return lrc->ctx_timestamp;
1780 }
1781