1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_lrc.h"
7
8 #include <generated/xe_wa_oob.h>
9
10 #include <linux/ascii85.h>
11
12 #include "instructions/xe_mi_commands.h"
13 #include "instructions/xe_gfxpipe_commands.h"
14 #include "instructions/xe_gfx_state_commands.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_lrc_layout.h"
17 #include "xe_bb.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_drm_client.h"
21 #include "xe_exec_queue_types.h"
22 #include "xe_gt.h"
23 #include "xe_gt_printk.h"
24 #include "xe_hw_fence.h"
25 #include "xe_map.h"
26 #include "xe_memirq.h"
27 #include "xe_sriov.h"
28 #include "xe_vm.h"
29 #include "xe_wa.h"
30
31 #define LRC_VALID BIT_ULL(0)
32 #define LRC_PRIVILEGE BIT_ULL(8)
33 #define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3)
34 #define LRC_LEGACY_64B_CONTEXT 3
35
36 #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
37 #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
38
39 #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
40
41 static struct xe_device *
lrc_to_xe(struct xe_lrc * lrc)42 lrc_to_xe(struct xe_lrc *lrc)
43 {
44 return gt_to_xe(lrc->fence_ctx.gt);
45 }
46
xe_gt_lrc_size(struct xe_gt * gt,enum xe_engine_class class)47 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
48 {
49 struct xe_device *xe = gt_to_xe(gt);
50 size_t size;
51
52 switch (class) {
53 case XE_ENGINE_CLASS_RENDER:
54 if (GRAPHICS_VER(xe) >= 20)
55 size = 4 * SZ_4K;
56 else
57 size = 14 * SZ_4K;
58 break;
59 case XE_ENGINE_CLASS_COMPUTE:
60 /* 14 pages since graphics_ver == 11 */
61 if (GRAPHICS_VER(xe) >= 20)
62 size = 3 * SZ_4K;
63 else
64 size = 14 * SZ_4K;
65 break;
66 default:
67 WARN(1, "Unknown engine class: %d", class);
68 fallthrough;
69 case XE_ENGINE_CLASS_COPY:
70 case XE_ENGINE_CLASS_VIDEO_DECODE:
71 case XE_ENGINE_CLASS_VIDEO_ENHANCE:
72 case XE_ENGINE_CLASS_OTHER:
73 size = 2 * SZ_4K;
74 }
75
76 /* Add indirect ring state page */
77 if (xe_gt_has_indirect_ring_state(gt))
78 size += LRC_INDIRECT_RING_STATE_SIZE;
79
80 return size;
81 }
82
83 /*
84 * The per-platform tables are u8-encoded in @data. Decode @data and set the
85 * addresses' offset and commands in @regs. The following encoding is used
86 * for each byte. There are 2 steps: decoding commands and decoding addresses.
87 *
88 * Commands:
89 * [7]: create NOPs - number of NOPs are set in lower bits
90 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
91 * MI_LRI_FORCE_POSTED
92 * [5:0]: Number of NOPs or registers to set values to in case of
93 * MI_LOAD_REGISTER_IMM
94 *
95 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
96 * number of registers. They are set by using the REG/REG16 macros: the former
97 * is used for offsets smaller than 0x200 while the latter is for values bigger
98 * than that. Those macros already set all the bits documented below correctly:
99 *
100 * [7]: When a register offset needs more than 6 bits, use additional bytes, to
101 * follow, for the lower bits
102 * [6:0]: Register offset, without considering the engine base.
103 *
104 * This function only tweaks the commands and register offsets. Values are not
105 * filled out.
106 */
set_offsets(u32 * regs,const u8 * data,const struct xe_hw_engine * hwe)107 static void set_offsets(u32 *regs,
108 const u8 *data,
109 const struct xe_hw_engine *hwe)
110 #define NOP(x) (BIT(7) | (x))
111 #define LRI(count, flags) ((flags) << 6 | (count) | \
112 BUILD_BUG_ON_ZERO(count >= BIT(6)))
113 #define POSTED BIT(0)
114 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
115 #define REG16(x) \
116 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
117 (((x) >> 2) & 0x7f)
118 {
119 const u32 base = hwe->mmio_base;
120
121 while (*data) {
122 u8 count, flags;
123
124 if (*data & BIT(7)) { /* skip */
125 count = *data++ & ~BIT(7);
126 regs += count;
127 continue;
128 }
129
130 count = *data & 0x3f;
131 flags = *data >> 6;
132 data++;
133
134 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
135 if (flags & POSTED)
136 *regs |= MI_LRI_FORCE_POSTED;
137 *regs |= MI_LRI_LRM_CS_MMIO;
138 regs++;
139
140 xe_gt_assert(hwe->gt, count);
141 do {
142 u32 offset = 0;
143 u8 v;
144
145 do {
146 v = *data++;
147 offset <<= 7;
148 offset |= v & ~BIT(7);
149 } while (v & BIT(7));
150
151 regs[0] = base + (offset << 2);
152 regs += 2;
153 } while (--count);
154 }
155
156 *regs = MI_BATCH_BUFFER_END | BIT(0);
157 }
158
159 static const u8 gen12_xcs_offsets[] = {
160 NOP(1),
161 LRI(13, POSTED),
162 REG16(0x244),
163 REG(0x034),
164 REG(0x030),
165 REG(0x038),
166 REG(0x03c),
167 REG(0x168),
168 REG(0x140),
169 REG(0x110),
170 REG(0x1c0),
171 REG(0x1c4),
172 REG(0x1c8),
173 REG(0x180),
174 REG16(0x2b4),
175
176 NOP(5),
177 LRI(9, POSTED),
178 REG16(0x3a8),
179 REG16(0x28c),
180 REG16(0x288),
181 REG16(0x284),
182 REG16(0x280),
183 REG16(0x27c),
184 REG16(0x278),
185 REG16(0x274),
186 REG16(0x270),
187
188 0
189 };
190
191 static const u8 dg2_xcs_offsets[] = {
192 NOP(1),
193 LRI(15, POSTED),
194 REG16(0x244),
195 REG(0x034),
196 REG(0x030),
197 REG(0x038),
198 REG(0x03c),
199 REG(0x168),
200 REG(0x140),
201 REG(0x110),
202 REG(0x1c0),
203 REG(0x1c4),
204 REG(0x1c8),
205 REG(0x180),
206 REG16(0x2b4),
207 REG(0x120),
208 REG(0x124),
209
210 NOP(1),
211 LRI(9, POSTED),
212 REG16(0x3a8),
213 REG16(0x28c),
214 REG16(0x288),
215 REG16(0x284),
216 REG16(0x280),
217 REG16(0x27c),
218 REG16(0x278),
219 REG16(0x274),
220 REG16(0x270),
221
222 0
223 };
224
225 static const u8 gen12_rcs_offsets[] = {
226 NOP(1),
227 LRI(13, POSTED),
228 REG16(0x244),
229 REG(0x034),
230 REG(0x030),
231 REG(0x038),
232 REG(0x03c),
233 REG(0x168),
234 REG(0x140),
235 REG(0x110),
236 REG(0x1c0),
237 REG(0x1c4),
238 REG(0x1c8),
239 REG(0x180),
240 REG16(0x2b4),
241
242 NOP(5),
243 LRI(9, POSTED),
244 REG16(0x3a8),
245 REG16(0x28c),
246 REG16(0x288),
247 REG16(0x284),
248 REG16(0x280),
249 REG16(0x27c),
250 REG16(0x278),
251 REG16(0x274),
252 REG16(0x270),
253
254 LRI(3, POSTED),
255 REG(0x1b0),
256 REG16(0x5a8),
257 REG16(0x5ac),
258
259 NOP(6),
260 LRI(1, 0),
261 REG(0x0c8),
262 NOP(3 + 9 + 1),
263
264 LRI(51, POSTED),
265 REG16(0x588),
266 REG16(0x588),
267 REG16(0x588),
268 REG16(0x588),
269 REG16(0x588),
270 REG16(0x588),
271 REG(0x028),
272 REG(0x09c),
273 REG(0x0c0),
274 REG(0x178),
275 REG(0x17c),
276 REG16(0x358),
277 REG(0x170),
278 REG(0x150),
279 REG(0x154),
280 REG(0x158),
281 REG16(0x41c),
282 REG16(0x600),
283 REG16(0x604),
284 REG16(0x608),
285 REG16(0x60c),
286 REG16(0x610),
287 REG16(0x614),
288 REG16(0x618),
289 REG16(0x61c),
290 REG16(0x620),
291 REG16(0x624),
292 REG16(0x628),
293 REG16(0x62c),
294 REG16(0x630),
295 REG16(0x634),
296 REG16(0x638),
297 REG16(0x63c),
298 REG16(0x640),
299 REG16(0x644),
300 REG16(0x648),
301 REG16(0x64c),
302 REG16(0x650),
303 REG16(0x654),
304 REG16(0x658),
305 REG16(0x65c),
306 REG16(0x660),
307 REG16(0x664),
308 REG16(0x668),
309 REG16(0x66c),
310 REG16(0x670),
311 REG16(0x674),
312 REG16(0x678),
313 REG16(0x67c),
314 REG(0x068),
315 REG(0x084),
316 NOP(1),
317
318 0
319 };
320
321 static const u8 xehp_rcs_offsets[] = {
322 NOP(1),
323 LRI(13, POSTED),
324 REG16(0x244),
325 REG(0x034),
326 REG(0x030),
327 REG(0x038),
328 REG(0x03c),
329 REG(0x168),
330 REG(0x140),
331 REG(0x110),
332 REG(0x1c0),
333 REG(0x1c4),
334 REG(0x1c8),
335 REG(0x180),
336 REG16(0x2b4),
337
338 NOP(5),
339 LRI(9, POSTED),
340 REG16(0x3a8),
341 REG16(0x28c),
342 REG16(0x288),
343 REG16(0x284),
344 REG16(0x280),
345 REG16(0x27c),
346 REG16(0x278),
347 REG16(0x274),
348 REG16(0x270),
349
350 LRI(3, POSTED),
351 REG(0x1b0),
352 REG16(0x5a8),
353 REG16(0x5ac),
354
355 NOP(6),
356 LRI(1, 0),
357 REG(0x0c8),
358
359 0
360 };
361
362 static const u8 dg2_rcs_offsets[] = {
363 NOP(1),
364 LRI(15, POSTED),
365 REG16(0x244),
366 REG(0x034),
367 REG(0x030),
368 REG(0x038),
369 REG(0x03c),
370 REG(0x168),
371 REG(0x140),
372 REG(0x110),
373 REG(0x1c0),
374 REG(0x1c4),
375 REG(0x1c8),
376 REG(0x180),
377 REG16(0x2b4),
378 REG(0x120),
379 REG(0x124),
380
381 NOP(1),
382 LRI(9, POSTED),
383 REG16(0x3a8),
384 REG16(0x28c),
385 REG16(0x288),
386 REG16(0x284),
387 REG16(0x280),
388 REG16(0x27c),
389 REG16(0x278),
390 REG16(0x274),
391 REG16(0x270),
392
393 LRI(3, POSTED),
394 REG(0x1b0),
395 REG16(0x5a8),
396 REG16(0x5ac),
397
398 NOP(6),
399 LRI(1, 0),
400 REG(0x0c8),
401
402 0
403 };
404
405 static const u8 mtl_rcs_offsets[] = {
406 NOP(1),
407 LRI(15, POSTED),
408 REG16(0x244),
409 REG(0x034),
410 REG(0x030),
411 REG(0x038),
412 REG(0x03c),
413 REG(0x168),
414 REG(0x140),
415 REG(0x110),
416 REG(0x1c0),
417 REG(0x1c4),
418 REG(0x1c8),
419 REG(0x180),
420 REG16(0x2b4),
421 REG(0x120),
422 REG(0x124),
423
424 NOP(1),
425 LRI(9, POSTED),
426 REG16(0x3a8),
427 REG16(0x28c),
428 REG16(0x288),
429 REG16(0x284),
430 REG16(0x280),
431 REG16(0x27c),
432 REG16(0x278),
433 REG16(0x274),
434 REG16(0x270),
435
436 NOP(2),
437 LRI(2, POSTED),
438 REG16(0x5a8),
439 REG16(0x5ac),
440
441 NOP(6),
442 LRI(1, 0),
443 REG(0x0c8),
444
445 0
446 };
447
448 #define XE2_CTX_COMMON \
449 NOP(1), /* [0x00] */ \
450 LRI(15, POSTED), /* [0x01] */ \
451 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \
452 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \
453 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \
454 REG(0x038), /* [0x08] RING_BUFFER_START */ \
455 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \
456 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \
457 REG(0x140), /* [0x0e] BB_ADDR */ \
458 REG(0x110), /* [0x10] BB_STATE */ \
459 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \
460 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \
461 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
462 REG(0x180), /* [0x18] CCID */ \
463 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \
464 REG(0x120), /* [0x1c] PRT_BB_STATE */ \
465 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \
466 \
467 NOP(1), /* [0x20] */ \
468 LRI(9, POSTED), /* [0x21] */ \
469 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \
470 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \
471 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \
472 REG16(0x284), /* [0x28] dummy reg */ \
473 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \
474 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \
475 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \
476 REG16(0x274), /* [0x30] PTBP_UDW */ \
477 REG16(0x270) /* [0x32] PTBP_LDW */
478
479 static const u8 xe2_rcs_offsets[] = {
480 XE2_CTX_COMMON,
481
482 NOP(2), /* [0x34] */
483 LRI(2, POSTED), /* [0x36] */
484 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
485 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */
486
487 NOP(6), /* [0x41] */
488 LRI(1, 0), /* [0x47] */
489 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
490
491 0
492 };
493
494 static const u8 xe2_bcs_offsets[] = {
495 XE2_CTX_COMMON,
496
497 NOP(4 + 8 + 1), /* [0x34] */
498 LRI(2, POSTED), /* [0x41] */
499 REG16(0x200), /* [0x42] BCS_SWCTRL */
500 REG16(0x204), /* [0x44] BLIT_CCTL */
501
502 0
503 };
504
505 static const u8 xe2_xcs_offsets[] = {
506 XE2_CTX_COMMON,
507
508 0
509 };
510
511 static const u8 xe2_indirect_ring_state_offsets[] = {
512 NOP(1), /* [0x00] */
513 LRI(5, POSTED), /* [0x01] */
514 REG(0x034), /* [0x02] RING_BUFFER_HEAD */
515 REG(0x030), /* [0x04] RING_BUFFER_TAIL */
516 REG(0x038), /* [0x06] RING_BUFFER_START */
517 REG(0x048), /* [0x08] RING_BUFFER_START_UDW */
518 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */
519
520 NOP(5), /* [0x0c] */
521 LRI(9, POSTED), /* [0x11] */
522 REG(0x168), /* [0x12] BB_ADDR_UDW */
523 REG(0x140), /* [0x14] BB_ADDR */
524 REG(0x110), /* [0x16] BB_STATE */
525 REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */
526 REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */
527 REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */
528 REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */
529 REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */
530 REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */
531
532 NOP(12), /* [0x00] */
533
534 0
535 };
536
537 #undef REG16
538 #undef REG
539 #undef LRI
540 #undef NOP
541
reg_offsets(struct xe_device * xe,enum xe_engine_class class)542 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
543 {
544 if (class == XE_ENGINE_CLASS_RENDER) {
545 if (GRAPHICS_VER(xe) >= 20)
546 return xe2_rcs_offsets;
547 else if (GRAPHICS_VERx100(xe) >= 1270)
548 return mtl_rcs_offsets;
549 else if (GRAPHICS_VERx100(xe) >= 1255)
550 return dg2_rcs_offsets;
551 else if (GRAPHICS_VERx100(xe) >= 1250)
552 return xehp_rcs_offsets;
553 else
554 return gen12_rcs_offsets;
555 } else if (class == XE_ENGINE_CLASS_COPY) {
556 if (GRAPHICS_VER(xe) >= 20)
557 return xe2_bcs_offsets;
558 else
559 return gen12_xcs_offsets;
560 } else {
561 if (GRAPHICS_VER(xe) >= 20)
562 return xe2_xcs_offsets;
563 else if (GRAPHICS_VERx100(xe) >= 1255)
564 return dg2_xcs_offsets;
565 else
566 return gen12_xcs_offsets;
567 }
568 }
569
set_context_control(u32 * regs,struct xe_hw_engine * hwe)570 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
571 {
572 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
573 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
574
575 if (xe_gt_has_indirect_ring_state(hwe->gt))
576 regs[CTX_CONTEXT_CONTROL] |=
577 _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
578
579 /* TODO: Timestamp */
580 }
581
set_memory_based_intr(u32 * regs,struct xe_hw_engine * hwe)582 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
583 {
584 struct xe_memirq *memirq = >_to_tile(hwe->gt)->memirq;
585 struct xe_device *xe = gt_to_xe(hwe->gt);
586
587 if (!xe_device_uses_memirq(xe))
588 return;
589
590 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
591 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
592 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
593 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
594
595 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
596 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
597 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
598 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
599 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
600 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
601 }
602
lrc_ring_mi_mode(struct xe_hw_engine * hwe)603 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
604 {
605 struct xe_device *xe = gt_to_xe(hwe->gt);
606
607 if (GRAPHICS_VERx100(xe) >= 1250)
608 return 0x70;
609 else
610 return 0x60;
611 }
612
reset_stop_ring(u32 * regs,struct xe_hw_engine * hwe)613 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
614 {
615 int x;
616
617 x = lrc_ring_mi_mode(hwe);
618 regs[x + 1] &= ~STOP_RING;
619 regs[x + 1] |= STOP_RING << 16;
620 }
621
xe_lrc_has_indirect_ring_state(struct xe_lrc * lrc)622 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
623 {
624 return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
625 }
626
__xe_lrc_ring_offset(struct xe_lrc * lrc)627 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
628 {
629 return 0;
630 }
631
xe_lrc_pphwsp_offset(struct xe_lrc * lrc)632 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
633 {
634 return lrc->ring.size;
635 }
636
637 /* Make the magic macros work */
638 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
639 #define __xe_lrc_regs_offset xe_lrc_regs_offset
640
641 #define LRC_SEQNO_PPHWSP_OFFSET 512
642 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
643 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
644 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
645 #define LRC_PPHWSP_SIZE SZ_4K
646
xe_lrc_regs_offset(struct xe_lrc * lrc)647 u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
648 {
649 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
650 }
651
lrc_reg_size(struct xe_device * xe)652 static size_t lrc_reg_size(struct xe_device *xe)
653 {
654 if (GRAPHICS_VERx100(xe) >= 1250)
655 return 96 * sizeof(u32);
656 else
657 return 80 * sizeof(u32);
658 }
659
xe_lrc_skip_size(struct xe_device * xe)660 size_t xe_lrc_skip_size(struct xe_device *xe)
661 {
662 return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
663 }
664
__xe_lrc_seqno_offset(struct xe_lrc * lrc)665 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
666 {
667 /* The seqno is stored in the driver-defined portion of PPHWSP */
668 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
669 }
670
__xe_lrc_start_seqno_offset(struct xe_lrc * lrc)671 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
672 {
673 /* The start seqno is stored in the driver-defined portion of PPHWSP */
674 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
675 }
676
__xe_lrc_ctx_job_timestamp_offset(struct xe_lrc * lrc)677 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
678 {
679 /* The start seqno is stored in the driver-defined portion of PPHWSP */
680 return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
681 }
682
__xe_lrc_parallel_offset(struct xe_lrc * lrc)683 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
684 {
685 /* The parallel is stored in the driver-defined portion of PPHWSP */
686 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
687 }
688
__xe_lrc_ctx_timestamp_offset(struct xe_lrc * lrc)689 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
690 {
691 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
692 }
693
__xe_lrc_indirect_ring_offset(struct xe_lrc * lrc)694 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
695 {
696 /* Indirect ring state page is at the very end of LRC */
697 return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
698 }
699
700 #define DECL_MAP_ADDR_HELPERS(elem) \
701 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
702 { \
703 struct iosys_map map = lrc->bo->vmap; \
704 \
705 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \
706 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
707 return map; \
708 } \
709 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
710 { \
711 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
712 } \
713
714 DECL_MAP_ADDR_HELPERS(ring)
DECL_MAP_ADDR_HELPERS(pphwsp)715 DECL_MAP_ADDR_HELPERS(pphwsp)
716 DECL_MAP_ADDR_HELPERS(seqno)
717 DECL_MAP_ADDR_HELPERS(regs)
718 DECL_MAP_ADDR_HELPERS(start_seqno)
719 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
720 DECL_MAP_ADDR_HELPERS(ctx_timestamp)
721 DECL_MAP_ADDR_HELPERS(parallel)
722 DECL_MAP_ADDR_HELPERS(indirect_ring)
723
724 #undef DECL_MAP_ADDR_HELPERS
725
726 /**
727 * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
728 * @lrc: Pointer to the lrc.
729 *
730 * Returns: ctx timestamp GGTT address
731 */
732 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
733 {
734 return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
735 }
736
737 /**
738 * xe_lrc_ctx_timestamp() - Read ctx timestamp value
739 * @lrc: Pointer to the lrc.
740 *
741 * Returns: ctx timestamp value
742 */
xe_lrc_ctx_timestamp(struct xe_lrc * lrc)743 u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
744 {
745 struct xe_device *xe = lrc_to_xe(lrc);
746 struct iosys_map map;
747
748 map = __xe_lrc_ctx_timestamp_map(lrc);
749 return xe_map_read32(xe, &map);
750 }
751
752 /**
753 * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
754 * @lrc: Pointer to the lrc.
755 *
756 * Returns: ctx timestamp job GGTT address
757 */
xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc * lrc)758 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
759 {
760 return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
761 }
762
763 /**
764 * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
765 * @lrc: Pointer to the lrc.
766 *
767 * Returns: ctx timestamp job value
768 */
xe_lrc_ctx_job_timestamp(struct xe_lrc * lrc)769 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
770 {
771 struct xe_device *xe = lrc_to_xe(lrc);
772 struct iosys_map map;
773
774 map = __xe_lrc_ctx_job_timestamp_map(lrc);
775 return xe_map_read32(xe, &map);
776 }
777
xe_lrc_ggtt_addr(struct xe_lrc * lrc)778 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
779 {
780 return __xe_lrc_pphwsp_ggtt_addr(lrc);
781 }
782
xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc * lrc)783 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
784 {
785 if (!xe_lrc_has_indirect_ring_state(lrc))
786 return 0;
787
788 return __xe_lrc_indirect_ring_ggtt_addr(lrc);
789 }
790
xe_lrc_read_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr)791 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
792 {
793 struct xe_device *xe = lrc_to_xe(lrc);
794 struct iosys_map map;
795
796 map = __xe_lrc_indirect_ring_map(lrc);
797 iosys_map_incr(&map, reg_nr * sizeof(u32));
798 return xe_map_read32(xe, &map);
799 }
800
xe_lrc_write_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)801 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
802 int reg_nr, u32 val)
803 {
804 struct xe_device *xe = lrc_to_xe(lrc);
805 struct iosys_map map;
806
807 map = __xe_lrc_indirect_ring_map(lrc);
808 iosys_map_incr(&map, reg_nr * sizeof(u32));
809 xe_map_write32(xe, &map, val);
810 }
811
xe_lrc_read_ctx_reg(struct xe_lrc * lrc,int reg_nr)812 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
813 {
814 struct xe_device *xe = lrc_to_xe(lrc);
815 struct iosys_map map;
816
817 map = __xe_lrc_regs_map(lrc);
818 iosys_map_incr(&map, reg_nr * sizeof(u32));
819 return xe_map_read32(xe, &map);
820 }
821
xe_lrc_write_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)822 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
823 {
824 struct xe_device *xe = lrc_to_xe(lrc);
825 struct iosys_map map;
826
827 map = __xe_lrc_regs_map(lrc);
828 iosys_map_incr(&map, reg_nr * sizeof(u32));
829 xe_map_write32(xe, &map, val);
830 }
831
empty_lrc_data(struct xe_hw_engine * hwe)832 static void *empty_lrc_data(struct xe_hw_engine *hwe)
833 {
834 struct xe_gt *gt = hwe->gt;
835 void *data;
836 u32 *regs;
837
838 data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
839 if (!data)
840 return NULL;
841
842 /* 1st page: Per-Process of HW status Page */
843 regs = data + LRC_PPHWSP_SIZE;
844 set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
845 set_context_control(regs, hwe);
846 set_memory_based_intr(regs, hwe);
847 reset_stop_ring(regs, hwe);
848 if (xe_gt_has_indirect_ring_state(gt)) {
849 regs = data + xe_gt_lrc_size(gt, hwe->class) -
850 LRC_INDIRECT_RING_STATE_SIZE;
851 set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
852 }
853
854 return data;
855 }
856
xe_lrc_set_ppgtt(struct xe_lrc * lrc,struct xe_vm * vm)857 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
858 {
859 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
860
861 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
862 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
863 }
864
xe_lrc_finish(struct xe_lrc * lrc)865 static void xe_lrc_finish(struct xe_lrc *lrc)
866 {
867 xe_hw_fence_ctx_finish(&lrc->fence_ctx);
868 xe_bo_lock(lrc->bo, false);
869 xe_bo_unpin(lrc->bo);
870 xe_bo_unlock(lrc->bo);
871 xe_bo_put(lrc->bo);
872 }
873
874 #define PVC_CTX_ASID (0x2e + 1)
875 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
876
xe_lrc_init(struct xe_lrc * lrc,struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size)877 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
878 struct xe_vm *vm, u32 ring_size)
879 {
880 struct xe_gt *gt = hwe->gt;
881 struct xe_tile *tile = gt_to_tile(gt);
882 struct xe_device *xe = gt_to_xe(gt);
883 struct iosys_map map;
884 void *init_data = NULL;
885 u32 arb_enable;
886 u32 lrc_size;
887 int err;
888
889 kref_init(&lrc->refcount);
890 lrc->flags = 0;
891 lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
892 if (xe_gt_has_indirect_ring_state(gt))
893 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
894
895 /*
896 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
897 * via VM bind calls.
898 */
899 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
900 ttm_bo_type_kernel,
901 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
902 XE_BO_FLAG_GGTT |
903 XE_BO_FLAG_GGTT_INVALIDATE);
904 if (IS_ERR(lrc->bo))
905 return PTR_ERR(lrc->bo);
906
907 lrc->size = lrc_size;
908 lrc->tile = gt_to_tile(hwe->gt);
909 lrc->ring.size = ring_size;
910 lrc->ring.tail = 0;
911 lrc->ctx_timestamp = 0;
912
913 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
914 hwe->fence_irq, hwe->name);
915
916 if (!gt->default_lrc[hwe->class]) {
917 init_data = empty_lrc_data(hwe);
918 if (!init_data) {
919 err = -ENOMEM;
920 goto err_lrc_finish;
921 }
922 }
923
924 /*
925 * Init Per-Process of HW status Page, LRC / context state to known
926 * values
927 */
928 map = __xe_lrc_pphwsp_map(lrc);
929 if (!init_data) {
930 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
931 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
932 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
933 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
934 } else {
935 xe_map_memcpy_to(xe, &map, 0, init_data,
936 xe_gt_lrc_size(gt, hwe->class));
937 kfree(init_data);
938 }
939
940 if (vm) {
941 xe_lrc_set_ppgtt(lrc, vm);
942
943 if (vm->xef)
944 xe_drm_client_add_bo(vm->xef->client, lrc->bo);
945 }
946
947 if (xe_gt_has_indirect_ring_state(gt)) {
948 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
949 __xe_lrc_indirect_ring_ggtt_addr(lrc));
950
951 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
952 __xe_lrc_ring_ggtt_addr(lrc));
953 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
954 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
955 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
956 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
957 RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
958 } else {
959 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
960 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
961 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
962 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
963 RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
964 }
965
966 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
967
968 if (xe->info.has_asid && vm)
969 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
970
971 lrc->desc = LRC_VALID;
972 lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
973 /* TODO: Priority */
974
975 /* While this appears to have something about privileged batches or
976 * some such, it really just means PPGTT mode.
977 */
978 if (vm)
979 lrc->desc |= LRC_PRIVILEGE;
980
981 if (GRAPHICS_VERx100(xe) < 1250) {
982 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
983 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
984 }
985
986 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
987 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
988
989 map = __xe_lrc_seqno_map(lrc);
990 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
991
992 map = __xe_lrc_start_seqno_map(lrc);
993 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
994
995 return 0;
996
997 err_lrc_finish:
998 xe_lrc_finish(lrc);
999 return err;
1000 }
1001
1002 /**
1003 * xe_lrc_create - Create a LRC
1004 * @hwe: Hardware Engine
1005 * @vm: The VM (address space)
1006 * @ring_size: LRC ring size
1007 *
1008 * Allocate and initialize the Logical Ring Context (LRC).
1009 *
1010 * Return pointer to created LRC upon success and an error pointer
1011 * upon failure.
1012 */
xe_lrc_create(struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size)1013 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
1014 u32 ring_size)
1015 {
1016 struct xe_lrc *lrc;
1017 int err;
1018
1019 lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
1020 if (!lrc)
1021 return ERR_PTR(-ENOMEM);
1022
1023 err = xe_lrc_init(lrc, hwe, vm, ring_size);
1024 if (err) {
1025 kfree(lrc);
1026 return ERR_PTR(err);
1027 }
1028
1029 return lrc;
1030 }
1031
1032 /**
1033 * xe_lrc_destroy - Destroy the LRC
1034 * @ref: reference to LRC
1035 *
1036 * Called when ref == 0, release resources held by the Logical Ring Context
1037 * (LRC) and free the LRC memory.
1038 */
xe_lrc_destroy(struct kref * ref)1039 void xe_lrc_destroy(struct kref *ref)
1040 {
1041 struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
1042
1043 xe_lrc_finish(lrc);
1044 kfree(lrc);
1045 }
1046
xe_lrc_set_ring_tail(struct xe_lrc * lrc,u32 tail)1047 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
1048 {
1049 if (xe_lrc_has_indirect_ring_state(lrc))
1050 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
1051 else
1052 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
1053 }
1054
xe_lrc_ring_tail(struct xe_lrc * lrc)1055 u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1056 {
1057 if (xe_lrc_has_indirect_ring_state(lrc))
1058 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1059 else
1060 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1061 }
1062
xe_lrc_set_ring_head(struct xe_lrc * lrc,u32 head)1063 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1064 {
1065 if (xe_lrc_has_indirect_ring_state(lrc))
1066 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1067 else
1068 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
1069 }
1070
xe_lrc_ring_head(struct xe_lrc * lrc)1071 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1072 {
1073 if (xe_lrc_has_indirect_ring_state(lrc))
1074 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1075 else
1076 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
1077 }
1078
xe_lrc_ring_space(struct xe_lrc * lrc)1079 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1080 {
1081 const u32 head = xe_lrc_ring_head(lrc);
1082 const u32 tail = lrc->ring.tail;
1083 const u32 size = lrc->ring.size;
1084
1085 return ((head - tail - 1) & (size - 1)) + 1;
1086 }
1087
__xe_lrc_write_ring(struct xe_lrc * lrc,struct iosys_map ring,const void * data,size_t size)1088 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1089 const void *data, size_t size)
1090 {
1091 struct xe_device *xe = lrc_to_xe(lrc);
1092
1093 iosys_map_incr(&ring, lrc->ring.tail);
1094 xe_map_memcpy_to(xe, &ring, 0, data, size);
1095 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1096 }
1097
xe_lrc_write_ring(struct xe_lrc * lrc,const void * data,size_t size)1098 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1099 {
1100 struct xe_device *xe = lrc_to_xe(lrc);
1101 struct iosys_map ring;
1102 u32 rhs;
1103 size_t aligned_size;
1104
1105 xe_assert(xe, IS_ALIGNED(size, 4));
1106 aligned_size = ALIGN(size, 8);
1107
1108 ring = __xe_lrc_ring_map(lrc);
1109
1110 xe_assert(xe, lrc->ring.tail < lrc->ring.size);
1111 rhs = lrc->ring.size - lrc->ring.tail;
1112 if (size > rhs) {
1113 __xe_lrc_write_ring(lrc, ring, data, rhs);
1114 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1115 } else {
1116 __xe_lrc_write_ring(lrc, ring, data, size);
1117 }
1118
1119 if (aligned_size > size) {
1120 u32 noop = MI_NOOP;
1121
1122 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1123 }
1124 }
1125
xe_lrc_descriptor(struct xe_lrc * lrc)1126 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1127 {
1128 return lrc->desc | xe_lrc_ggtt_addr(lrc);
1129 }
1130
xe_lrc_seqno_ggtt_addr(struct xe_lrc * lrc)1131 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1132 {
1133 return __xe_lrc_seqno_ggtt_addr(lrc);
1134 }
1135
1136 /**
1137 * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1138 *
1139 * Allocate but don't initialize an lrc seqno fence.
1140 *
1141 * Return: Pointer to the allocated fence or
1142 * negative error pointer on error.
1143 */
xe_lrc_alloc_seqno_fence(void)1144 struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1145 {
1146 return xe_hw_fence_alloc();
1147 }
1148
1149 /**
1150 * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1151 * @fence: Pointer to the fence to free.
1152 *
1153 * Frees an lrc seqno fence that hasn't yet been
1154 * initialized.
1155 */
xe_lrc_free_seqno_fence(struct dma_fence * fence)1156 void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1157 {
1158 xe_hw_fence_free(fence);
1159 }
1160
1161 /**
1162 * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1163 * @lrc: Pointer to the lrc.
1164 * @fence: Pointer to the fence to initialize.
1165 *
1166 * Initializes a pre-allocated lrc seqno fence.
1167 * After initialization, the fence is subject to normal
1168 * dma-fence refcounting.
1169 */
xe_lrc_init_seqno_fence(struct xe_lrc * lrc,struct dma_fence * fence)1170 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1171 {
1172 xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1173 }
1174
xe_lrc_seqno(struct xe_lrc * lrc)1175 s32 xe_lrc_seqno(struct xe_lrc *lrc)
1176 {
1177 struct iosys_map map = __xe_lrc_seqno_map(lrc);
1178
1179 return xe_map_read32(lrc_to_xe(lrc), &map);
1180 }
1181
xe_lrc_start_seqno(struct xe_lrc * lrc)1182 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1183 {
1184 struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1185
1186 return xe_map_read32(lrc_to_xe(lrc), &map);
1187 }
1188
xe_lrc_start_seqno_ggtt_addr(struct xe_lrc * lrc)1189 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1190 {
1191 return __xe_lrc_start_seqno_ggtt_addr(lrc);
1192 }
1193
xe_lrc_parallel_ggtt_addr(struct xe_lrc * lrc)1194 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1195 {
1196 return __xe_lrc_parallel_ggtt_addr(lrc);
1197 }
1198
xe_lrc_parallel_map(struct xe_lrc * lrc)1199 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1200 {
1201 return __xe_lrc_parallel_map(lrc);
1202 }
1203
instr_dw(u32 cmd_header)1204 static int instr_dw(u32 cmd_header)
1205 {
1206 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1207 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1208 GFXPIPE_SINGLE_DW_CMD(0, 0))
1209 return 1;
1210
1211 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1212 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1213 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1214
1215 /* Most instructions have the # of dwords (minus 2) in 7:0 */
1216 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1217 }
1218
dump_mi_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1219 static int dump_mi_command(struct drm_printer *p,
1220 struct xe_gt *gt,
1221 u32 *dw,
1222 int remaining_dw)
1223 {
1224 u32 inst_header = *dw;
1225 u32 numdw = instr_dw(inst_header);
1226 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1227 int num_noop;
1228
1229 /* First check for commands that don't have/use a '# DW' field */
1230 switch (inst_header & MI_OPCODE) {
1231 case MI_NOOP:
1232 num_noop = 1;
1233 while (num_noop < remaining_dw &&
1234 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1235 num_noop++;
1236 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1237 return num_noop;
1238
1239 case MI_TOPOLOGY_FILTER:
1240 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1241 return 1;
1242
1243 case MI_BATCH_BUFFER_END:
1244 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1245 /* Return 'remaining_dw' to consume the rest of the LRC */
1246 return remaining_dw;
1247 }
1248
1249 /*
1250 * Any remaining commands include a # of dwords. We should make sure
1251 * it doesn't exceed the remaining size of the LRC.
1252 */
1253 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1254 numdw = remaining_dw;
1255
1256 switch (inst_header & MI_OPCODE) {
1257 case MI_LOAD_REGISTER_IMM:
1258 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1259 inst_header, (numdw - 1) / 2);
1260 for (int i = 1; i < numdw; i += 2)
1261 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1262 return numdw;
1263
1264 case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1265 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1266 inst_header,
1267 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1268 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1269 if (numdw == 4)
1270 drm_printf(p, " - %#6x = %#010llx\n",
1271 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1272 else
1273 drm_printf(p, " - %*ph (%s)\n",
1274 (int)sizeof(u32) * (numdw - 1), dw + 1,
1275 numdw < 4 ? "truncated" : "malformed");
1276 return numdw;
1277
1278 case MI_FORCE_WAKEUP:
1279 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1280 return numdw;
1281
1282 default:
1283 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1284 inst_header, opcode, numdw);
1285 return numdw;
1286 }
1287 }
1288
dump_gfxpipe_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1289 static int dump_gfxpipe_command(struct drm_printer *p,
1290 struct xe_gt *gt,
1291 u32 *dw,
1292 int remaining_dw)
1293 {
1294 u32 numdw = instr_dw(*dw);
1295 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1296 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1297 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1298
1299 /*
1300 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1301 * remaining size of the LRC.
1302 */
1303 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1304 numdw = remaining_dw;
1305
1306 switch (*dw & GFXPIPE_MATCH_MASK) {
1307 #define MATCH(cmd) \
1308 case cmd: \
1309 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1310 return numdw
1311 #define MATCH3D(cmd) \
1312 case CMD_##cmd: \
1313 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1314 return numdw
1315
1316 MATCH(STATE_BASE_ADDRESS);
1317 MATCH(STATE_SIP);
1318 MATCH(GPGPU_CSR_BASE_ADDRESS);
1319 MATCH(STATE_COMPUTE_MODE);
1320 MATCH3D(3DSTATE_BTD);
1321 MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1322 MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
1323
1324 MATCH3D(3DSTATE_VF_STATISTICS);
1325
1326 MATCH(PIPELINE_SELECT);
1327
1328 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1329 MATCH3D(3DSTATE_CLEAR_PARAMS);
1330 MATCH3D(3DSTATE_DEPTH_BUFFER);
1331 MATCH3D(3DSTATE_STENCIL_BUFFER);
1332 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1333 MATCH3D(3DSTATE_VERTEX_BUFFERS);
1334 MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1335 MATCH3D(3DSTATE_INDEX_BUFFER);
1336 MATCH3D(3DSTATE_VF);
1337 MATCH3D(3DSTATE_MULTISAMPLE);
1338 MATCH3D(3DSTATE_CC_STATE_POINTERS);
1339 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1340 MATCH3D(3DSTATE_VS);
1341 MATCH3D(3DSTATE_GS);
1342 MATCH3D(3DSTATE_CLIP);
1343 MATCH3D(3DSTATE_SF);
1344 MATCH3D(3DSTATE_WM);
1345 MATCH3D(3DSTATE_CONSTANT_VS);
1346 MATCH3D(3DSTATE_CONSTANT_GS);
1347 MATCH3D(3DSTATE_CONSTANT_PS);
1348 MATCH3D(3DSTATE_SAMPLE_MASK);
1349 MATCH3D(3DSTATE_CONSTANT_HS);
1350 MATCH3D(3DSTATE_CONSTANT_DS);
1351 MATCH3D(3DSTATE_HS);
1352 MATCH3D(3DSTATE_TE);
1353 MATCH3D(3DSTATE_DS);
1354 MATCH3D(3DSTATE_STREAMOUT);
1355 MATCH3D(3DSTATE_SBE);
1356 MATCH3D(3DSTATE_PS);
1357 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1358 MATCH3D(3DSTATE_CPS_POINTERS);
1359 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1360 MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1361 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1362 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1363 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1364 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1365 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1366 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1367 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1368 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1369 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1370 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1371 MATCH3D(3DSTATE_VF_INSTANCING);
1372 MATCH3D(3DSTATE_VF_SGVS);
1373 MATCH3D(3DSTATE_VF_TOPOLOGY);
1374 MATCH3D(3DSTATE_WM_CHROMAKEY);
1375 MATCH3D(3DSTATE_PS_BLEND);
1376 MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1377 MATCH3D(3DSTATE_PS_EXTRA);
1378 MATCH3D(3DSTATE_RASTER);
1379 MATCH3D(3DSTATE_SBE_SWIZ);
1380 MATCH3D(3DSTATE_WM_HZ_OP);
1381 MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1382 MATCH3D(3DSTATE_VF_SGVS_2);
1383 MATCH3D(3DSTATE_VFG);
1384 MATCH3D(3DSTATE_URB_ALLOC_VS);
1385 MATCH3D(3DSTATE_URB_ALLOC_HS);
1386 MATCH3D(3DSTATE_URB_ALLOC_DS);
1387 MATCH3D(3DSTATE_URB_ALLOC_GS);
1388 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1389 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1390 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1391 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1392 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1393 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1394 MATCH3D(3DSTATE_AMFS);
1395 MATCH3D(3DSTATE_DEPTH_BOUNDS);
1396 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1397 MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1398 MATCH3D(3DSTATE_MESH_CONTROL);
1399 MATCH3D(3DSTATE_MESH_DISTRIB);
1400 MATCH3D(3DSTATE_TASK_REDISTRIB);
1401 MATCH3D(3DSTATE_MESH_SHADER);
1402 MATCH3D(3DSTATE_MESH_SHADER_DATA);
1403 MATCH3D(3DSTATE_TASK_CONTROL);
1404 MATCH3D(3DSTATE_TASK_SHADER);
1405 MATCH3D(3DSTATE_TASK_SHADER_DATA);
1406 MATCH3D(3DSTATE_URB_ALLOC_MESH);
1407 MATCH3D(3DSTATE_URB_ALLOC_TASK);
1408 MATCH3D(3DSTATE_CLIP_MESH);
1409 MATCH3D(3DSTATE_SBE_MESH);
1410 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1411
1412 MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1413 MATCH3D(3DSTATE_CHROMA_KEY);
1414 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1415 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1416 MATCH3D(3DSTATE_LINE_STIPPLE);
1417 MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1418 MATCH3D(3DSTATE_MONOFILTER_SIZE);
1419 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1420 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1421 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1422 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1423 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1424 MATCH3D(3DSTATE_SO_DECL_LIST);
1425 MATCH3D(3DSTATE_SO_BUFFER);
1426 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1427 MATCH3D(3DSTATE_SAMPLE_PATTERN);
1428 MATCH3D(3DSTATE_3D_MODE);
1429 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1430 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1431 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1432
1433 default:
1434 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1435 *dw, pipeline, opcode, subopcode, numdw);
1436 return numdw;
1437 }
1438 }
1439
dump_gfx_state_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1440 static int dump_gfx_state_command(struct drm_printer *p,
1441 struct xe_gt *gt,
1442 u32 *dw,
1443 int remaining_dw)
1444 {
1445 u32 numdw = instr_dw(*dw);
1446 u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1447
1448 /*
1449 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1450 * remaining size of the LRC.
1451 */
1452 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1453 numdw = remaining_dw;
1454
1455 switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1456 MATCH(STATE_WRITE_INLINE);
1457
1458 default:
1459 drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1460 *dw, opcode, numdw);
1461 return numdw;
1462 }
1463 }
1464
xe_lrc_dump_default(struct drm_printer * p,struct xe_gt * gt,enum xe_engine_class hwe_class)1465 void xe_lrc_dump_default(struct drm_printer *p,
1466 struct xe_gt *gt,
1467 enum xe_engine_class hwe_class)
1468 {
1469 u32 *dw;
1470 int remaining_dw, num_dw;
1471
1472 if (!gt->default_lrc[hwe_class]) {
1473 drm_printf(p, "No default LRC for class %d\n", hwe_class);
1474 return;
1475 }
1476
1477 /*
1478 * Skip the beginning of the LRC since it contains the per-process
1479 * hardware status page.
1480 */
1481 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1482 remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
1483
1484 while (remaining_dw > 0) {
1485 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1486 num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1487 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1488 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1489 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1490 num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
1491 } else {
1492 num_dw = min(instr_dw(*dw), remaining_dw);
1493 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1494 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1495 num_dw);
1496 }
1497
1498 dw += num_dw;
1499 remaining_dw -= num_dw;
1500 }
1501 }
1502
1503 struct instr_state {
1504 u32 instr;
1505 u16 num_dw;
1506 };
1507
1508 static const struct instr_state xe_hpg_svg_state[] = {
1509 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1510 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1511 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1512 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1513 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1514 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1515 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1516 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1517 { .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1518 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1519 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1520 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1521 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1522 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1523 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1524 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1525 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1526 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1527 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1528 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1529 { .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1530 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1531 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1532 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1533 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1534 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1535 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1536 { .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1537 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1538 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1539 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1540 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1541 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1542 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1543 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1544 { .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1545 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1546 { .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1547 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1548 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1549 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1550 { .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1551 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1552 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1553 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1554 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1555 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1556 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1557 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1558 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1559 };
1560
xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue * q,struct xe_bb * bb)1561 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1562 {
1563 struct xe_gt *gt = q->hwe->gt;
1564 struct xe_device *xe = gt_to_xe(gt);
1565 const struct instr_state *state_table = NULL;
1566 int state_table_size = 0;
1567
1568 /*
1569 * Wa_14019789679
1570 *
1571 * If the driver doesn't explicitly emit the SVG instructions while
1572 * setting up the default LRC, the context switch will write 0's
1573 * (noops) into the LRC memory rather than the expected instruction
1574 * headers. Application contexts start out as a copy of the default
1575 * LRC, and if they also do not emit specific settings for some SVG
1576 * state, then on context restore they'll unintentionally inherit
1577 * whatever state setting the previous context had programmed into the
1578 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
1579 * prevent the hardware from resetting that state back to any specific
1580 * value).
1581 *
1582 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
1583 * since that's a specific state setting that can easily cause GPU
1584 * hangs if unintentionally inherited. However to be safe we'll
1585 * continue to emit all of the SVG state since it's best not to leak
1586 * any of the state between contexts, even if that leakage is harmless.
1587 */
1588 if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
1589 state_table = xe_hpg_svg_state;
1590 state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1591 }
1592
1593 if (!state_table) {
1594 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1595 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1596 return;
1597 }
1598
1599 for (int i = 0; i < state_table_size; i++) {
1600 u32 instr = state_table[i].instr;
1601 u16 num_dw = state_table[i].num_dw;
1602 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1603
1604 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1605 xe_gt_assert(gt, num_dw != 0);
1606 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1607
1608 /*
1609 * Xe2's SVG context is the same as the one on DG2 / MTL
1610 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1611 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1612 * Just make the replacement here rather than defining a
1613 * whole separate table for the single trivial change.
1614 */
1615 if (GRAPHICS_VER(xe) >= 20 &&
1616 instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1617 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1618
1619 bb->cs[bb->len] = instr;
1620 if (!is_single_dw)
1621 bb->cs[bb->len] |= (num_dw - 2);
1622
1623 bb->len += num_dw;
1624 }
1625 }
1626
xe_lrc_snapshot_capture(struct xe_lrc * lrc)1627 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1628 {
1629 struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1630
1631 if (!snapshot)
1632 return NULL;
1633
1634 if (lrc->bo->vm)
1635 xe_vm_get(lrc->bo->vm);
1636
1637 snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
1638 snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
1639 snapshot->head = xe_lrc_ring_head(lrc);
1640 snapshot->tail.internal = lrc->ring.tail;
1641 snapshot->tail.memory = xe_lrc_ring_tail(lrc);
1642 snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1643 snapshot->seqno = xe_lrc_seqno(lrc);
1644 snapshot->lrc_bo = xe_bo_get(lrc->bo);
1645 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
1646 snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
1647 snapshot->lrc_snapshot = NULL;
1648 snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1649 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
1650 return snapshot;
1651 }
1652
xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot * snapshot)1653 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
1654 {
1655 struct xe_bo *bo;
1656 struct xe_vm *vm;
1657 struct iosys_map src;
1658
1659 if (!snapshot)
1660 return;
1661
1662 bo = snapshot->lrc_bo;
1663 vm = bo->vm;
1664 snapshot->lrc_bo = NULL;
1665
1666 snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
1667 if (!snapshot->lrc_snapshot)
1668 goto put_bo;
1669
1670 xe_bo_lock(bo, false);
1671 if (!ttm_bo_vmap(&bo->ttm, &src)) {
1672 xe_map_memcpy_from(xe_bo_device(bo),
1673 snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
1674 snapshot->lrc_size);
1675 ttm_bo_vunmap(&bo->ttm, &src);
1676 } else {
1677 kvfree(snapshot->lrc_snapshot);
1678 snapshot->lrc_snapshot = NULL;
1679 }
1680 xe_bo_unlock(bo);
1681 put_bo:
1682 xe_bo_put(bo);
1683 if (vm)
1684 xe_vm_put(vm);
1685 }
1686
xe_lrc_snapshot_print(struct xe_lrc_snapshot * snapshot,struct drm_printer * p)1687 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
1688 {
1689 unsigned long i;
1690
1691 if (!snapshot)
1692 return;
1693
1694 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
1695 drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
1696 snapshot->indirect_context_desc);
1697 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
1698 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1699 snapshot->tail.internal, snapshot->tail.memory);
1700 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
1701 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
1702 drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
1703 drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
1704
1705 if (!snapshot->lrc_snapshot)
1706 return;
1707
1708 drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
1709 drm_puts(p, "\t[HWSP].data: ");
1710 for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
1711 u32 *val = snapshot->lrc_snapshot + i;
1712 char dumped[ASCII85_BUFSZ];
1713
1714 drm_puts(p, ascii85_encode(*val, dumped));
1715 }
1716
1717 drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
1718 drm_puts(p, "\t[HWCTX].data: ");
1719 for (; i < snapshot->lrc_size; i += sizeof(u32)) {
1720 u32 *val = snapshot->lrc_snapshot + i;
1721 char dumped[ASCII85_BUFSZ];
1722
1723 drm_puts(p, ascii85_encode(*val, dumped));
1724 }
1725 drm_puts(p, "\n");
1726 }
1727
xe_lrc_snapshot_free(struct xe_lrc_snapshot * snapshot)1728 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
1729 {
1730 if (!snapshot)
1731 return;
1732
1733 kvfree(snapshot->lrc_snapshot);
1734 if (snapshot->lrc_bo) {
1735 struct xe_vm *vm;
1736
1737 vm = snapshot->lrc_bo->vm;
1738 xe_bo_put(snapshot->lrc_bo);
1739 if (vm)
1740 xe_vm_put(vm);
1741 }
1742 kfree(snapshot);
1743 }
1744
1745 /**
1746 * xe_lrc_update_timestamp() - Update ctx timestamp
1747 * @lrc: Pointer to the lrc.
1748 * @old_ts: Old timestamp value
1749 *
1750 * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
1751 * update saved value.
1752 *
1753 * Returns: New ctx timestamp value
1754 */
xe_lrc_update_timestamp(struct xe_lrc * lrc,u32 * old_ts)1755 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
1756 {
1757 *old_ts = lrc->ctx_timestamp;
1758
1759 lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1760
1761 return lrc->ctx_timestamp;
1762 }
1763