1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_lrc.h"
7
8 #include <generated/xe_wa_oob.h>
9
10 #include <linux/ascii85.h>
11
12 #include "instructions/xe_mi_commands.h"
13 #include "instructions/xe_gfxpipe_commands.h"
14 #include "instructions/xe_gfx_state_commands.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_lrc_layout.h"
17 #include "xe_bb.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_drm_client.h"
21 #include "xe_exec_queue_types.h"
22 #include "xe_gt.h"
23 #include "xe_gt_printk.h"
24 #include "xe_hw_fence.h"
25 #include "xe_map.h"
26 #include "xe_memirq.h"
27 #include "xe_sriov.h"
28 #include "xe_trace_lrc.h"
29 #include "xe_vm.h"
30 #include "xe_wa.h"
31
32 #define LRC_VALID BIT_ULL(0)
33 #define LRC_PRIVILEGE BIT_ULL(8)
34 #define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3)
35 #define LRC_LEGACY_64B_CONTEXT 3
36
37 #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
38 #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
39
40 #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
41
42 static struct xe_device *
lrc_to_xe(struct xe_lrc * lrc)43 lrc_to_xe(struct xe_lrc *lrc)
44 {
45 return gt_to_xe(lrc->fence_ctx.gt);
46 }
47
xe_gt_lrc_size(struct xe_gt * gt,enum xe_engine_class class)48 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
49 {
50 struct xe_device *xe = gt_to_xe(gt);
51 size_t size;
52
53 switch (class) {
54 case XE_ENGINE_CLASS_RENDER:
55 if (GRAPHICS_VER(xe) >= 20)
56 size = 4 * SZ_4K;
57 else
58 size = 14 * SZ_4K;
59 break;
60 case XE_ENGINE_CLASS_COMPUTE:
61 /* 14 pages since graphics_ver == 11 */
62 if (GRAPHICS_VER(xe) >= 20)
63 size = 3 * SZ_4K;
64 else
65 size = 14 * SZ_4K;
66 break;
67 default:
68 WARN(1, "Unknown engine class: %d", class);
69 fallthrough;
70 case XE_ENGINE_CLASS_COPY:
71 case XE_ENGINE_CLASS_VIDEO_DECODE:
72 case XE_ENGINE_CLASS_VIDEO_ENHANCE:
73 case XE_ENGINE_CLASS_OTHER:
74 size = 2 * SZ_4K;
75 }
76
77 /* Add indirect ring state page */
78 if (xe_gt_has_indirect_ring_state(gt))
79 size += LRC_INDIRECT_RING_STATE_SIZE;
80
81 return size;
82 }
83
84 /*
85 * The per-platform tables are u8-encoded in @data. Decode @data and set the
86 * addresses' offset and commands in @regs. The following encoding is used
87 * for each byte. There are 2 steps: decoding commands and decoding addresses.
88 *
89 * Commands:
90 * [7]: create NOPs - number of NOPs are set in lower bits
91 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
92 * MI_LRI_FORCE_POSTED
93 * [5:0]: Number of NOPs or registers to set values to in case of
94 * MI_LOAD_REGISTER_IMM
95 *
96 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
97 * number of registers. They are set by using the REG/REG16 macros: the former
98 * is used for offsets smaller than 0x200 while the latter is for values bigger
99 * than that. Those macros already set all the bits documented below correctly:
100 *
101 * [7]: When a register offset needs more than 6 bits, use additional bytes, to
102 * follow, for the lower bits
103 * [6:0]: Register offset, without considering the engine base.
104 *
105 * This function only tweaks the commands and register offsets. Values are not
106 * filled out.
107 */
set_offsets(u32 * regs,const u8 * data,const struct xe_hw_engine * hwe)108 static void set_offsets(u32 *regs,
109 const u8 *data,
110 const struct xe_hw_engine *hwe)
111 #define NOP(x) (BIT(7) | (x))
112 #define LRI(count, flags) ((flags) << 6 | (count) | \
113 BUILD_BUG_ON_ZERO(count >= BIT(6)))
114 #define POSTED BIT(0)
115 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
116 #define REG16(x) \
117 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
118 (((x) >> 2) & 0x7f)
119 {
120 const u32 base = hwe->mmio_base;
121
122 while (*data) {
123 u8 count, flags;
124
125 if (*data & BIT(7)) { /* skip */
126 count = *data++ & ~BIT(7);
127 regs += count;
128 continue;
129 }
130
131 count = *data & 0x3f;
132 flags = *data >> 6;
133 data++;
134
135 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
136 if (flags & POSTED)
137 *regs |= MI_LRI_FORCE_POSTED;
138 *regs |= MI_LRI_LRM_CS_MMIO;
139 regs++;
140
141 xe_gt_assert(hwe->gt, count);
142 do {
143 u32 offset = 0;
144 u8 v;
145
146 do {
147 v = *data++;
148 offset <<= 7;
149 offset |= v & ~BIT(7);
150 } while (v & BIT(7));
151
152 regs[0] = base + (offset << 2);
153 regs += 2;
154 } while (--count);
155 }
156
157 *regs = MI_BATCH_BUFFER_END | BIT(0);
158 }
159
160 static const u8 gen12_xcs_offsets[] = {
161 NOP(1),
162 LRI(13, POSTED),
163 REG16(0x244),
164 REG(0x034),
165 REG(0x030),
166 REG(0x038),
167 REG(0x03c),
168 REG(0x168),
169 REG(0x140),
170 REG(0x110),
171 REG(0x1c0),
172 REG(0x1c4),
173 REG(0x1c8),
174 REG(0x180),
175 REG16(0x2b4),
176
177 NOP(5),
178 LRI(9, POSTED),
179 REG16(0x3a8),
180 REG16(0x28c),
181 REG16(0x288),
182 REG16(0x284),
183 REG16(0x280),
184 REG16(0x27c),
185 REG16(0x278),
186 REG16(0x274),
187 REG16(0x270),
188
189 0
190 };
191
192 static const u8 dg2_xcs_offsets[] = {
193 NOP(1),
194 LRI(15, POSTED),
195 REG16(0x244),
196 REG(0x034),
197 REG(0x030),
198 REG(0x038),
199 REG(0x03c),
200 REG(0x168),
201 REG(0x140),
202 REG(0x110),
203 REG(0x1c0),
204 REG(0x1c4),
205 REG(0x1c8),
206 REG(0x180),
207 REG16(0x2b4),
208 REG(0x120),
209 REG(0x124),
210
211 NOP(1),
212 LRI(9, POSTED),
213 REG16(0x3a8),
214 REG16(0x28c),
215 REG16(0x288),
216 REG16(0x284),
217 REG16(0x280),
218 REG16(0x27c),
219 REG16(0x278),
220 REG16(0x274),
221 REG16(0x270),
222
223 0
224 };
225
226 static const u8 gen12_rcs_offsets[] = {
227 NOP(1),
228 LRI(13, POSTED),
229 REG16(0x244),
230 REG(0x034),
231 REG(0x030),
232 REG(0x038),
233 REG(0x03c),
234 REG(0x168),
235 REG(0x140),
236 REG(0x110),
237 REG(0x1c0),
238 REG(0x1c4),
239 REG(0x1c8),
240 REG(0x180),
241 REG16(0x2b4),
242
243 NOP(5),
244 LRI(9, POSTED),
245 REG16(0x3a8),
246 REG16(0x28c),
247 REG16(0x288),
248 REG16(0x284),
249 REG16(0x280),
250 REG16(0x27c),
251 REG16(0x278),
252 REG16(0x274),
253 REG16(0x270),
254
255 LRI(3, POSTED),
256 REG(0x1b0),
257 REG16(0x5a8),
258 REG16(0x5ac),
259
260 NOP(6),
261 LRI(1, 0),
262 REG(0x0c8),
263 NOP(3 + 9 + 1),
264
265 LRI(51, POSTED),
266 REG16(0x588),
267 REG16(0x588),
268 REG16(0x588),
269 REG16(0x588),
270 REG16(0x588),
271 REG16(0x588),
272 REG(0x028),
273 REG(0x09c),
274 REG(0x0c0),
275 REG(0x178),
276 REG(0x17c),
277 REG16(0x358),
278 REG(0x170),
279 REG(0x150),
280 REG(0x154),
281 REG(0x158),
282 REG16(0x41c),
283 REG16(0x600),
284 REG16(0x604),
285 REG16(0x608),
286 REG16(0x60c),
287 REG16(0x610),
288 REG16(0x614),
289 REG16(0x618),
290 REG16(0x61c),
291 REG16(0x620),
292 REG16(0x624),
293 REG16(0x628),
294 REG16(0x62c),
295 REG16(0x630),
296 REG16(0x634),
297 REG16(0x638),
298 REG16(0x63c),
299 REG16(0x640),
300 REG16(0x644),
301 REG16(0x648),
302 REG16(0x64c),
303 REG16(0x650),
304 REG16(0x654),
305 REG16(0x658),
306 REG16(0x65c),
307 REG16(0x660),
308 REG16(0x664),
309 REG16(0x668),
310 REG16(0x66c),
311 REG16(0x670),
312 REG16(0x674),
313 REG16(0x678),
314 REG16(0x67c),
315 REG(0x068),
316 REG(0x084),
317 NOP(1),
318
319 0
320 };
321
322 static const u8 xehp_rcs_offsets[] = {
323 NOP(1),
324 LRI(13, POSTED),
325 REG16(0x244),
326 REG(0x034),
327 REG(0x030),
328 REG(0x038),
329 REG(0x03c),
330 REG(0x168),
331 REG(0x140),
332 REG(0x110),
333 REG(0x1c0),
334 REG(0x1c4),
335 REG(0x1c8),
336 REG(0x180),
337 REG16(0x2b4),
338
339 NOP(5),
340 LRI(9, POSTED),
341 REG16(0x3a8),
342 REG16(0x28c),
343 REG16(0x288),
344 REG16(0x284),
345 REG16(0x280),
346 REG16(0x27c),
347 REG16(0x278),
348 REG16(0x274),
349 REG16(0x270),
350
351 LRI(3, POSTED),
352 REG(0x1b0),
353 REG16(0x5a8),
354 REG16(0x5ac),
355
356 NOP(6),
357 LRI(1, 0),
358 REG(0x0c8),
359
360 0
361 };
362
363 static const u8 dg2_rcs_offsets[] = {
364 NOP(1),
365 LRI(15, POSTED),
366 REG16(0x244),
367 REG(0x034),
368 REG(0x030),
369 REG(0x038),
370 REG(0x03c),
371 REG(0x168),
372 REG(0x140),
373 REG(0x110),
374 REG(0x1c0),
375 REG(0x1c4),
376 REG(0x1c8),
377 REG(0x180),
378 REG16(0x2b4),
379 REG(0x120),
380 REG(0x124),
381
382 NOP(1),
383 LRI(9, POSTED),
384 REG16(0x3a8),
385 REG16(0x28c),
386 REG16(0x288),
387 REG16(0x284),
388 REG16(0x280),
389 REG16(0x27c),
390 REG16(0x278),
391 REG16(0x274),
392 REG16(0x270),
393
394 LRI(3, POSTED),
395 REG(0x1b0),
396 REG16(0x5a8),
397 REG16(0x5ac),
398
399 NOP(6),
400 LRI(1, 0),
401 REG(0x0c8),
402
403 0
404 };
405
406 static const u8 mtl_rcs_offsets[] = {
407 NOP(1),
408 LRI(15, POSTED),
409 REG16(0x244),
410 REG(0x034),
411 REG(0x030),
412 REG(0x038),
413 REG(0x03c),
414 REG(0x168),
415 REG(0x140),
416 REG(0x110),
417 REG(0x1c0),
418 REG(0x1c4),
419 REG(0x1c8),
420 REG(0x180),
421 REG16(0x2b4),
422 REG(0x120),
423 REG(0x124),
424
425 NOP(1),
426 LRI(9, POSTED),
427 REG16(0x3a8),
428 REG16(0x28c),
429 REG16(0x288),
430 REG16(0x284),
431 REG16(0x280),
432 REG16(0x27c),
433 REG16(0x278),
434 REG16(0x274),
435 REG16(0x270),
436
437 NOP(2),
438 LRI(2, POSTED),
439 REG16(0x5a8),
440 REG16(0x5ac),
441
442 NOP(6),
443 LRI(1, 0),
444 REG(0x0c8),
445
446 0
447 };
448
449 #define XE2_CTX_COMMON \
450 NOP(1), /* [0x00] */ \
451 LRI(15, POSTED), /* [0x01] */ \
452 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \
453 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \
454 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \
455 REG(0x038), /* [0x08] RING_BUFFER_START */ \
456 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \
457 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \
458 REG(0x140), /* [0x0e] BB_ADDR */ \
459 REG(0x110), /* [0x10] BB_STATE */ \
460 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \
461 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \
462 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
463 REG(0x180), /* [0x18] CCID */ \
464 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \
465 REG(0x120), /* [0x1c] PRT_BB_STATE */ \
466 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \
467 \
468 NOP(1), /* [0x20] */ \
469 LRI(9, POSTED), /* [0x21] */ \
470 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \
471 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \
472 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \
473 REG16(0x284), /* [0x28] dummy reg */ \
474 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \
475 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \
476 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \
477 REG16(0x274), /* [0x30] PTBP_UDW */ \
478 REG16(0x270) /* [0x32] PTBP_LDW */
479
480 static const u8 xe2_rcs_offsets[] = {
481 XE2_CTX_COMMON,
482
483 NOP(2), /* [0x34] */
484 LRI(2, POSTED), /* [0x36] */
485 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
486 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */
487
488 NOP(6), /* [0x41] */
489 LRI(1, 0), /* [0x47] */
490 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
491
492 0
493 };
494
495 static const u8 xe2_bcs_offsets[] = {
496 XE2_CTX_COMMON,
497
498 NOP(4 + 8 + 1), /* [0x34] */
499 LRI(2, POSTED), /* [0x41] */
500 REG16(0x200), /* [0x42] BCS_SWCTRL */
501 REG16(0x204), /* [0x44] BLIT_CCTL */
502
503 0
504 };
505
506 static const u8 xe2_xcs_offsets[] = {
507 XE2_CTX_COMMON,
508
509 0
510 };
511
512 static const u8 xe2_indirect_ring_state_offsets[] = {
513 NOP(1), /* [0x00] */
514 LRI(5, POSTED), /* [0x01] */
515 REG(0x034), /* [0x02] RING_BUFFER_HEAD */
516 REG(0x030), /* [0x04] RING_BUFFER_TAIL */
517 REG(0x038), /* [0x06] RING_BUFFER_START */
518 REG(0x048), /* [0x08] RING_BUFFER_START_UDW */
519 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */
520
521 NOP(5), /* [0x0c] */
522 LRI(9, POSTED), /* [0x11] */
523 REG(0x168), /* [0x12] BB_ADDR_UDW */
524 REG(0x140), /* [0x14] BB_ADDR */
525 REG(0x110), /* [0x16] BB_STATE */
526 REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */
527 REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */
528 REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */
529 REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */
530 REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */
531 REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */
532
533 NOP(12), /* [0x00] */
534
535 0
536 };
537
538 #undef REG16
539 #undef REG
540 #undef LRI
541 #undef NOP
542
reg_offsets(struct xe_device * xe,enum xe_engine_class class)543 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
544 {
545 if (class == XE_ENGINE_CLASS_RENDER) {
546 if (GRAPHICS_VER(xe) >= 20)
547 return xe2_rcs_offsets;
548 else if (GRAPHICS_VERx100(xe) >= 1270)
549 return mtl_rcs_offsets;
550 else if (GRAPHICS_VERx100(xe) >= 1255)
551 return dg2_rcs_offsets;
552 else if (GRAPHICS_VERx100(xe) >= 1250)
553 return xehp_rcs_offsets;
554 else
555 return gen12_rcs_offsets;
556 } else if (class == XE_ENGINE_CLASS_COPY) {
557 if (GRAPHICS_VER(xe) >= 20)
558 return xe2_bcs_offsets;
559 else
560 return gen12_xcs_offsets;
561 } else {
562 if (GRAPHICS_VER(xe) >= 20)
563 return xe2_xcs_offsets;
564 else if (GRAPHICS_VERx100(xe) >= 1255)
565 return dg2_xcs_offsets;
566 else
567 return gen12_xcs_offsets;
568 }
569 }
570
set_context_control(u32 * regs,struct xe_hw_engine * hwe)571 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
572 {
573 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
574 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
575
576 if (xe_gt_has_indirect_ring_state(hwe->gt))
577 regs[CTX_CONTEXT_CONTROL] |=
578 _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
579
580 /* TODO: Timestamp */
581 }
582
set_memory_based_intr(u32 * regs,struct xe_hw_engine * hwe)583 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
584 {
585 struct xe_memirq *memirq = >_to_tile(hwe->gt)->memirq;
586 struct xe_device *xe = gt_to_xe(hwe->gt);
587 u8 num_regs;
588
589 if (!xe_device_uses_memirq(xe))
590 return;
591
592 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
593 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
594 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
595 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
596
597 num_regs = xe_device_has_msix(xe) ? 3 : 2;
598 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) |
599 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
600 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
601 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
602 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
603 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
604
605 if (xe_device_has_msix(xe)) {
606 regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr;
607 /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */
608 }
609 }
610
lrc_ring_mi_mode(struct xe_hw_engine * hwe)611 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
612 {
613 struct xe_device *xe = gt_to_xe(hwe->gt);
614
615 if (GRAPHICS_VERx100(xe) >= 1250)
616 return 0x70;
617 else
618 return 0x60;
619 }
620
reset_stop_ring(u32 * regs,struct xe_hw_engine * hwe)621 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
622 {
623 int x;
624
625 x = lrc_ring_mi_mode(hwe);
626 regs[x + 1] &= ~STOP_RING;
627 regs[x + 1] |= STOP_RING << 16;
628 }
629
xe_lrc_has_indirect_ring_state(struct xe_lrc * lrc)630 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
631 {
632 return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
633 }
634
__xe_lrc_ring_offset(struct xe_lrc * lrc)635 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
636 {
637 return 0;
638 }
639
xe_lrc_pphwsp_offset(struct xe_lrc * lrc)640 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
641 {
642 return lrc->ring.size;
643 }
644
645 /* Make the magic macros work */
646 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
647 #define __xe_lrc_regs_offset xe_lrc_regs_offset
648
649 #define LRC_SEQNO_PPHWSP_OFFSET 512
650 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
651 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
652 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
653 #define LRC_PPHWSP_SIZE SZ_4K
654
xe_lrc_regs_offset(struct xe_lrc * lrc)655 u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
656 {
657 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
658 }
659
lrc_reg_size(struct xe_device * xe)660 static size_t lrc_reg_size(struct xe_device *xe)
661 {
662 if (GRAPHICS_VERx100(xe) >= 1250)
663 return 96 * sizeof(u32);
664 else
665 return 80 * sizeof(u32);
666 }
667
xe_lrc_skip_size(struct xe_device * xe)668 size_t xe_lrc_skip_size(struct xe_device *xe)
669 {
670 return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
671 }
672
__xe_lrc_seqno_offset(struct xe_lrc * lrc)673 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
674 {
675 /* The seqno is stored in the driver-defined portion of PPHWSP */
676 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
677 }
678
__xe_lrc_start_seqno_offset(struct xe_lrc * lrc)679 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
680 {
681 /* The start seqno is stored in the driver-defined portion of PPHWSP */
682 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
683 }
684
__xe_lrc_ctx_job_timestamp_offset(struct xe_lrc * lrc)685 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
686 {
687 /* The start seqno is stored in the driver-defined portion of PPHWSP */
688 return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
689 }
690
__xe_lrc_parallel_offset(struct xe_lrc * lrc)691 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
692 {
693 /* The parallel is stored in the driver-defined portion of PPHWSP */
694 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
695 }
696
__xe_lrc_ctx_timestamp_offset(struct xe_lrc * lrc)697 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
698 {
699 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
700 }
701
__xe_lrc_indirect_ring_offset(struct xe_lrc * lrc)702 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
703 {
704 /* Indirect ring state page is at the very end of LRC */
705 return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
706 }
707
708 #define DECL_MAP_ADDR_HELPERS(elem) \
709 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
710 { \
711 struct iosys_map map = lrc->bo->vmap; \
712 \
713 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \
714 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
715 return map; \
716 } \
717 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
718 { \
719 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
720 } \
721
722 DECL_MAP_ADDR_HELPERS(ring)
DECL_MAP_ADDR_HELPERS(pphwsp)723 DECL_MAP_ADDR_HELPERS(pphwsp)
724 DECL_MAP_ADDR_HELPERS(seqno)
725 DECL_MAP_ADDR_HELPERS(regs)
726 DECL_MAP_ADDR_HELPERS(start_seqno)
727 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
728 DECL_MAP_ADDR_HELPERS(ctx_timestamp)
729 DECL_MAP_ADDR_HELPERS(parallel)
730 DECL_MAP_ADDR_HELPERS(indirect_ring)
731
732 #undef DECL_MAP_ADDR_HELPERS
733
734 /**
735 * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
736 * @lrc: Pointer to the lrc.
737 *
738 * Returns: ctx timestamp GGTT address
739 */
740 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
741 {
742 return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
743 }
744
745 /**
746 * xe_lrc_ctx_timestamp() - Read ctx timestamp value
747 * @lrc: Pointer to the lrc.
748 *
749 * Returns: ctx timestamp value
750 */
xe_lrc_ctx_timestamp(struct xe_lrc * lrc)751 u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
752 {
753 struct xe_device *xe = lrc_to_xe(lrc);
754 struct iosys_map map;
755
756 map = __xe_lrc_ctx_timestamp_map(lrc);
757 return xe_map_read32(xe, &map);
758 }
759
760 /**
761 * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
762 * @lrc: Pointer to the lrc.
763 *
764 * Returns: ctx timestamp job GGTT address
765 */
xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc * lrc)766 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
767 {
768 return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
769 }
770
771 /**
772 * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
773 * @lrc: Pointer to the lrc.
774 *
775 * Returns: ctx timestamp job value
776 */
xe_lrc_ctx_job_timestamp(struct xe_lrc * lrc)777 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
778 {
779 struct xe_device *xe = lrc_to_xe(lrc);
780 struct iosys_map map;
781
782 map = __xe_lrc_ctx_job_timestamp_map(lrc);
783 return xe_map_read32(xe, &map);
784 }
785
xe_lrc_ggtt_addr(struct xe_lrc * lrc)786 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
787 {
788 return __xe_lrc_pphwsp_ggtt_addr(lrc);
789 }
790
xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc * lrc)791 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
792 {
793 if (!xe_lrc_has_indirect_ring_state(lrc))
794 return 0;
795
796 return __xe_lrc_indirect_ring_ggtt_addr(lrc);
797 }
798
xe_lrc_read_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr)799 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
800 {
801 struct xe_device *xe = lrc_to_xe(lrc);
802 struct iosys_map map;
803
804 map = __xe_lrc_indirect_ring_map(lrc);
805 iosys_map_incr(&map, reg_nr * sizeof(u32));
806 return xe_map_read32(xe, &map);
807 }
808
xe_lrc_write_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)809 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
810 int reg_nr, u32 val)
811 {
812 struct xe_device *xe = lrc_to_xe(lrc);
813 struct iosys_map map;
814
815 map = __xe_lrc_indirect_ring_map(lrc);
816 iosys_map_incr(&map, reg_nr * sizeof(u32));
817 xe_map_write32(xe, &map, val);
818 }
819
xe_lrc_read_ctx_reg(struct xe_lrc * lrc,int reg_nr)820 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
821 {
822 struct xe_device *xe = lrc_to_xe(lrc);
823 struct iosys_map map;
824
825 map = __xe_lrc_regs_map(lrc);
826 iosys_map_incr(&map, reg_nr * sizeof(u32));
827 return xe_map_read32(xe, &map);
828 }
829
xe_lrc_write_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)830 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
831 {
832 struct xe_device *xe = lrc_to_xe(lrc);
833 struct iosys_map map;
834
835 map = __xe_lrc_regs_map(lrc);
836 iosys_map_incr(&map, reg_nr * sizeof(u32));
837 xe_map_write32(xe, &map, val);
838 }
839
empty_lrc_data(struct xe_hw_engine * hwe)840 static void *empty_lrc_data(struct xe_hw_engine *hwe)
841 {
842 struct xe_gt *gt = hwe->gt;
843 void *data;
844 u32 *regs;
845
846 data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
847 if (!data)
848 return NULL;
849
850 /* 1st page: Per-Process of HW status Page */
851 regs = data + LRC_PPHWSP_SIZE;
852 set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
853 set_context_control(regs, hwe);
854 set_memory_based_intr(regs, hwe);
855 reset_stop_ring(regs, hwe);
856 if (xe_gt_has_indirect_ring_state(gt)) {
857 regs = data + xe_gt_lrc_size(gt, hwe->class) -
858 LRC_INDIRECT_RING_STATE_SIZE;
859 set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
860 }
861
862 return data;
863 }
864
xe_lrc_set_ppgtt(struct xe_lrc * lrc,struct xe_vm * vm)865 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
866 {
867 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
868
869 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
870 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
871 }
872
xe_lrc_finish(struct xe_lrc * lrc)873 static void xe_lrc_finish(struct xe_lrc *lrc)
874 {
875 xe_hw_fence_ctx_finish(&lrc->fence_ctx);
876 xe_bo_lock(lrc->bo, false);
877 xe_bo_unpin(lrc->bo);
878 xe_bo_unlock(lrc->bo);
879 xe_bo_put(lrc->bo);
880 }
881
882 #define PVC_CTX_ASID (0x2e + 1)
883 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
884
xe_lrc_init(struct xe_lrc * lrc,struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size,u16 msix_vec)885 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
886 struct xe_vm *vm, u32 ring_size, u16 msix_vec)
887 {
888 struct xe_gt *gt = hwe->gt;
889 struct xe_tile *tile = gt_to_tile(gt);
890 struct xe_device *xe = gt_to_xe(gt);
891 struct iosys_map map;
892 void *init_data = NULL;
893 u32 arb_enable;
894 u32 lrc_size;
895 int err;
896
897 kref_init(&lrc->refcount);
898 lrc->flags = 0;
899 lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
900 if (xe_gt_has_indirect_ring_state(gt))
901 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
902
903 /*
904 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
905 * via VM bind calls.
906 */
907 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
908 ttm_bo_type_kernel,
909 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
910 XE_BO_FLAG_GGTT |
911 XE_BO_FLAG_GGTT_INVALIDATE);
912 if (IS_ERR(lrc->bo))
913 return PTR_ERR(lrc->bo);
914
915 lrc->size = lrc_size;
916 lrc->tile = gt_to_tile(hwe->gt);
917 lrc->ring.size = ring_size;
918 lrc->ring.tail = 0;
919 lrc->ctx_timestamp = 0;
920
921 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
922 hwe->fence_irq, hwe->name);
923
924 if (!gt->default_lrc[hwe->class]) {
925 init_data = empty_lrc_data(hwe);
926 if (!init_data) {
927 err = -ENOMEM;
928 goto err_lrc_finish;
929 }
930 }
931
932 /*
933 * Init Per-Process of HW status Page, LRC / context state to known
934 * values
935 */
936 map = __xe_lrc_pphwsp_map(lrc);
937 if (!init_data) {
938 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
939 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
940 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
941 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
942 } else {
943 xe_map_memcpy_to(xe, &map, 0, init_data,
944 xe_gt_lrc_size(gt, hwe->class));
945 kfree(init_data);
946 }
947
948 if (vm) {
949 xe_lrc_set_ppgtt(lrc, vm);
950
951 if (vm->xef)
952 xe_drm_client_add_bo(vm->xef->client, lrc->bo);
953 }
954
955 if (xe_device_has_msix(xe)) {
956 xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR,
957 xe_memirq_status_ptr(&tile->memirq, hwe));
958 xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR,
959 xe_memirq_source_ptr(&tile->memirq, hwe));
960 xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec);
961 }
962
963 if (xe_gt_has_indirect_ring_state(gt)) {
964 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
965 __xe_lrc_indirect_ring_ggtt_addr(lrc));
966
967 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
968 __xe_lrc_ring_ggtt_addr(lrc));
969 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
970 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
971 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
972 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
973 RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
974 } else {
975 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
976 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
977 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
978 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
979 RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
980 }
981
982 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
983
984 if (xe->info.has_asid && vm)
985 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
986
987 lrc->desc = LRC_VALID;
988 lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
989 /* TODO: Priority */
990
991 /* While this appears to have something about privileged batches or
992 * some such, it really just means PPGTT mode.
993 */
994 if (vm)
995 lrc->desc |= LRC_PRIVILEGE;
996
997 if (GRAPHICS_VERx100(xe) < 1250) {
998 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
999 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
1000 }
1001
1002 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1003 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
1004
1005 map = __xe_lrc_seqno_map(lrc);
1006 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1007
1008 map = __xe_lrc_start_seqno_map(lrc);
1009 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1010
1011 return 0;
1012
1013 err_lrc_finish:
1014 xe_lrc_finish(lrc);
1015 return err;
1016 }
1017
1018 /**
1019 * xe_lrc_create - Create a LRC
1020 * @hwe: Hardware Engine
1021 * @vm: The VM (address space)
1022 * @ring_size: LRC ring size
1023 * @msix_vec: MSI-X interrupt vector (for platforms that support it)
1024 *
1025 * Allocate and initialize the Logical Ring Context (LRC).
1026 *
1027 * Return pointer to created LRC upon success and an error pointer
1028 * upon failure.
1029 */
xe_lrc_create(struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size,u16 msix_vec)1030 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
1031 u32 ring_size, u16 msix_vec)
1032 {
1033 struct xe_lrc *lrc;
1034 int err;
1035
1036 lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
1037 if (!lrc)
1038 return ERR_PTR(-ENOMEM);
1039
1040 err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec);
1041 if (err) {
1042 kfree(lrc);
1043 return ERR_PTR(err);
1044 }
1045
1046 return lrc;
1047 }
1048
1049 /**
1050 * xe_lrc_destroy - Destroy the LRC
1051 * @ref: reference to LRC
1052 *
1053 * Called when ref == 0, release resources held by the Logical Ring Context
1054 * (LRC) and free the LRC memory.
1055 */
xe_lrc_destroy(struct kref * ref)1056 void xe_lrc_destroy(struct kref *ref)
1057 {
1058 struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
1059
1060 xe_lrc_finish(lrc);
1061 kfree(lrc);
1062 }
1063
xe_lrc_set_ring_tail(struct xe_lrc * lrc,u32 tail)1064 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
1065 {
1066 if (xe_lrc_has_indirect_ring_state(lrc))
1067 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
1068 else
1069 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
1070 }
1071
xe_lrc_ring_tail(struct xe_lrc * lrc)1072 u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1073 {
1074 if (xe_lrc_has_indirect_ring_state(lrc))
1075 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1076 else
1077 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1078 }
1079
xe_lrc_ring_start(struct xe_lrc * lrc)1080 static u32 xe_lrc_ring_start(struct xe_lrc *lrc)
1081 {
1082 if (xe_lrc_has_indirect_ring_state(lrc))
1083 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START);
1084 else
1085 return xe_lrc_read_ctx_reg(lrc, CTX_RING_START);
1086 }
1087
xe_lrc_set_ring_head(struct xe_lrc * lrc,u32 head)1088 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1089 {
1090 if (xe_lrc_has_indirect_ring_state(lrc))
1091 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1092 else
1093 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
1094 }
1095
xe_lrc_ring_head(struct xe_lrc * lrc)1096 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1097 {
1098 if (xe_lrc_has_indirect_ring_state(lrc))
1099 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1100 else
1101 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
1102 }
1103
xe_lrc_ring_space(struct xe_lrc * lrc)1104 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1105 {
1106 const u32 head = xe_lrc_ring_head(lrc);
1107 const u32 tail = lrc->ring.tail;
1108 const u32 size = lrc->ring.size;
1109
1110 return ((head - tail - 1) & (size - 1)) + 1;
1111 }
1112
__xe_lrc_write_ring(struct xe_lrc * lrc,struct iosys_map ring,const void * data,size_t size)1113 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1114 const void *data, size_t size)
1115 {
1116 struct xe_device *xe = lrc_to_xe(lrc);
1117
1118 iosys_map_incr(&ring, lrc->ring.tail);
1119 xe_map_memcpy_to(xe, &ring, 0, data, size);
1120 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1121 }
1122
xe_lrc_write_ring(struct xe_lrc * lrc,const void * data,size_t size)1123 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1124 {
1125 struct xe_device *xe = lrc_to_xe(lrc);
1126 struct iosys_map ring;
1127 u32 rhs;
1128 size_t aligned_size;
1129
1130 xe_assert(xe, IS_ALIGNED(size, 4));
1131 aligned_size = ALIGN(size, 8);
1132
1133 ring = __xe_lrc_ring_map(lrc);
1134
1135 xe_assert(xe, lrc->ring.tail < lrc->ring.size);
1136 rhs = lrc->ring.size - lrc->ring.tail;
1137 if (size > rhs) {
1138 __xe_lrc_write_ring(lrc, ring, data, rhs);
1139 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1140 } else {
1141 __xe_lrc_write_ring(lrc, ring, data, size);
1142 }
1143
1144 if (aligned_size > size) {
1145 u32 noop = MI_NOOP;
1146
1147 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1148 }
1149 }
1150
xe_lrc_descriptor(struct xe_lrc * lrc)1151 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1152 {
1153 return lrc->desc | xe_lrc_ggtt_addr(lrc);
1154 }
1155
xe_lrc_seqno_ggtt_addr(struct xe_lrc * lrc)1156 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1157 {
1158 return __xe_lrc_seqno_ggtt_addr(lrc);
1159 }
1160
1161 /**
1162 * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1163 *
1164 * Allocate but don't initialize an lrc seqno fence.
1165 *
1166 * Return: Pointer to the allocated fence or
1167 * negative error pointer on error.
1168 */
xe_lrc_alloc_seqno_fence(void)1169 struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1170 {
1171 return xe_hw_fence_alloc();
1172 }
1173
1174 /**
1175 * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1176 * @fence: Pointer to the fence to free.
1177 *
1178 * Frees an lrc seqno fence that hasn't yet been
1179 * initialized.
1180 */
xe_lrc_free_seqno_fence(struct dma_fence * fence)1181 void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1182 {
1183 xe_hw_fence_free(fence);
1184 }
1185
1186 /**
1187 * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1188 * @lrc: Pointer to the lrc.
1189 * @fence: Pointer to the fence to initialize.
1190 *
1191 * Initializes a pre-allocated lrc seqno fence.
1192 * After initialization, the fence is subject to normal
1193 * dma-fence refcounting.
1194 */
xe_lrc_init_seqno_fence(struct xe_lrc * lrc,struct dma_fence * fence)1195 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1196 {
1197 xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1198 }
1199
xe_lrc_seqno(struct xe_lrc * lrc)1200 s32 xe_lrc_seqno(struct xe_lrc *lrc)
1201 {
1202 struct iosys_map map = __xe_lrc_seqno_map(lrc);
1203
1204 return xe_map_read32(lrc_to_xe(lrc), &map);
1205 }
1206
xe_lrc_start_seqno(struct xe_lrc * lrc)1207 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1208 {
1209 struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1210
1211 return xe_map_read32(lrc_to_xe(lrc), &map);
1212 }
1213
xe_lrc_start_seqno_ggtt_addr(struct xe_lrc * lrc)1214 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1215 {
1216 return __xe_lrc_start_seqno_ggtt_addr(lrc);
1217 }
1218
xe_lrc_parallel_ggtt_addr(struct xe_lrc * lrc)1219 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1220 {
1221 return __xe_lrc_parallel_ggtt_addr(lrc);
1222 }
1223
xe_lrc_parallel_map(struct xe_lrc * lrc)1224 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1225 {
1226 return __xe_lrc_parallel_map(lrc);
1227 }
1228
instr_dw(u32 cmd_header)1229 static int instr_dw(u32 cmd_header)
1230 {
1231 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1232 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1233 GFXPIPE_SINGLE_DW_CMD(0, 0))
1234 return 1;
1235
1236 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1237 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1238 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1239
1240 /* Most instructions have the # of dwords (minus 2) in 7:0 */
1241 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1242 }
1243
dump_mi_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1244 static int dump_mi_command(struct drm_printer *p,
1245 struct xe_gt *gt,
1246 u32 *dw,
1247 int remaining_dw)
1248 {
1249 u32 inst_header = *dw;
1250 u32 numdw = instr_dw(inst_header);
1251 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1252 int num_noop;
1253
1254 /* First check for commands that don't have/use a '# DW' field */
1255 switch (inst_header & MI_OPCODE) {
1256 case MI_NOOP:
1257 num_noop = 1;
1258 while (num_noop < remaining_dw &&
1259 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1260 num_noop++;
1261 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1262 return num_noop;
1263
1264 case MI_TOPOLOGY_FILTER:
1265 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1266 return 1;
1267
1268 case MI_BATCH_BUFFER_END:
1269 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1270 /* Return 'remaining_dw' to consume the rest of the LRC */
1271 return remaining_dw;
1272 }
1273
1274 /*
1275 * Any remaining commands include a # of dwords. We should make sure
1276 * it doesn't exceed the remaining size of the LRC.
1277 */
1278 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1279 numdw = remaining_dw;
1280
1281 switch (inst_header & MI_OPCODE) {
1282 case MI_LOAD_REGISTER_IMM:
1283 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1284 inst_header, (numdw - 1) / 2);
1285 for (int i = 1; i < numdw; i += 2)
1286 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1287 return numdw;
1288
1289 case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1290 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1291 inst_header,
1292 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1293 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1294 if (numdw == 4)
1295 drm_printf(p, " - %#6x = %#010llx\n",
1296 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1297 else
1298 drm_printf(p, " - %*ph (%s)\n",
1299 (int)sizeof(u32) * (numdw - 1), dw + 1,
1300 numdw < 4 ? "truncated" : "malformed");
1301 return numdw;
1302
1303 case MI_FORCE_WAKEUP:
1304 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1305 return numdw;
1306
1307 default:
1308 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1309 inst_header, opcode, numdw);
1310 return numdw;
1311 }
1312 }
1313
dump_gfxpipe_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1314 static int dump_gfxpipe_command(struct drm_printer *p,
1315 struct xe_gt *gt,
1316 u32 *dw,
1317 int remaining_dw)
1318 {
1319 u32 numdw = instr_dw(*dw);
1320 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1321 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1322 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1323
1324 /*
1325 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1326 * remaining size of the LRC.
1327 */
1328 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1329 numdw = remaining_dw;
1330
1331 switch (*dw & GFXPIPE_MATCH_MASK) {
1332 #define MATCH(cmd) \
1333 case cmd: \
1334 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1335 return numdw
1336 #define MATCH3D(cmd) \
1337 case CMD_##cmd: \
1338 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1339 return numdw
1340
1341 MATCH(STATE_BASE_ADDRESS);
1342 MATCH(STATE_SIP);
1343 MATCH(GPGPU_CSR_BASE_ADDRESS);
1344 MATCH(STATE_COMPUTE_MODE);
1345 MATCH3D(3DSTATE_BTD);
1346 MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1347 MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
1348
1349 MATCH3D(3DSTATE_VF_STATISTICS);
1350
1351 MATCH(PIPELINE_SELECT);
1352
1353 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1354 MATCH3D(3DSTATE_CLEAR_PARAMS);
1355 MATCH3D(3DSTATE_DEPTH_BUFFER);
1356 MATCH3D(3DSTATE_STENCIL_BUFFER);
1357 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1358 MATCH3D(3DSTATE_VERTEX_BUFFERS);
1359 MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1360 MATCH3D(3DSTATE_INDEX_BUFFER);
1361 MATCH3D(3DSTATE_VF);
1362 MATCH3D(3DSTATE_MULTISAMPLE);
1363 MATCH3D(3DSTATE_CC_STATE_POINTERS);
1364 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1365 MATCH3D(3DSTATE_VS);
1366 MATCH3D(3DSTATE_GS);
1367 MATCH3D(3DSTATE_CLIP);
1368 MATCH3D(3DSTATE_SF);
1369 MATCH3D(3DSTATE_WM);
1370 MATCH3D(3DSTATE_CONSTANT_VS);
1371 MATCH3D(3DSTATE_CONSTANT_GS);
1372 MATCH3D(3DSTATE_CONSTANT_PS);
1373 MATCH3D(3DSTATE_SAMPLE_MASK);
1374 MATCH3D(3DSTATE_CONSTANT_HS);
1375 MATCH3D(3DSTATE_CONSTANT_DS);
1376 MATCH3D(3DSTATE_HS);
1377 MATCH3D(3DSTATE_TE);
1378 MATCH3D(3DSTATE_DS);
1379 MATCH3D(3DSTATE_STREAMOUT);
1380 MATCH3D(3DSTATE_SBE);
1381 MATCH3D(3DSTATE_PS);
1382 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1383 MATCH3D(3DSTATE_CPS_POINTERS);
1384 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1385 MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1386 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1387 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1388 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1389 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1390 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1391 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1392 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1393 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1394 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1395 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1396 MATCH3D(3DSTATE_VF_INSTANCING);
1397 MATCH3D(3DSTATE_VF_SGVS);
1398 MATCH3D(3DSTATE_VF_TOPOLOGY);
1399 MATCH3D(3DSTATE_WM_CHROMAKEY);
1400 MATCH3D(3DSTATE_PS_BLEND);
1401 MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1402 MATCH3D(3DSTATE_PS_EXTRA);
1403 MATCH3D(3DSTATE_RASTER);
1404 MATCH3D(3DSTATE_SBE_SWIZ);
1405 MATCH3D(3DSTATE_WM_HZ_OP);
1406 MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1407 MATCH3D(3DSTATE_VF_SGVS_2);
1408 MATCH3D(3DSTATE_VFG);
1409 MATCH3D(3DSTATE_URB_ALLOC_VS);
1410 MATCH3D(3DSTATE_URB_ALLOC_HS);
1411 MATCH3D(3DSTATE_URB_ALLOC_DS);
1412 MATCH3D(3DSTATE_URB_ALLOC_GS);
1413 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1414 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1415 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1416 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1417 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1418 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1419 MATCH3D(3DSTATE_AMFS);
1420 MATCH3D(3DSTATE_DEPTH_BOUNDS);
1421 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1422 MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1423 MATCH3D(3DSTATE_MESH_CONTROL);
1424 MATCH3D(3DSTATE_MESH_DISTRIB);
1425 MATCH3D(3DSTATE_TASK_REDISTRIB);
1426 MATCH3D(3DSTATE_MESH_SHADER);
1427 MATCH3D(3DSTATE_MESH_SHADER_DATA);
1428 MATCH3D(3DSTATE_TASK_CONTROL);
1429 MATCH3D(3DSTATE_TASK_SHADER);
1430 MATCH3D(3DSTATE_TASK_SHADER_DATA);
1431 MATCH3D(3DSTATE_URB_ALLOC_MESH);
1432 MATCH3D(3DSTATE_URB_ALLOC_TASK);
1433 MATCH3D(3DSTATE_CLIP_MESH);
1434 MATCH3D(3DSTATE_SBE_MESH);
1435 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1436
1437 MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1438 MATCH3D(3DSTATE_CHROMA_KEY);
1439 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1440 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1441 MATCH3D(3DSTATE_LINE_STIPPLE);
1442 MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1443 MATCH3D(3DSTATE_MONOFILTER_SIZE);
1444 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1445 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1446 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1447 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1448 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1449 MATCH3D(3DSTATE_SO_DECL_LIST);
1450 MATCH3D(3DSTATE_SO_BUFFER);
1451 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1452 MATCH3D(3DSTATE_SAMPLE_PATTERN);
1453 MATCH3D(3DSTATE_3D_MODE);
1454 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1455 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1456 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1457
1458 default:
1459 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1460 *dw, pipeline, opcode, subopcode, numdw);
1461 return numdw;
1462 }
1463 }
1464
dump_gfx_state_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1465 static int dump_gfx_state_command(struct drm_printer *p,
1466 struct xe_gt *gt,
1467 u32 *dw,
1468 int remaining_dw)
1469 {
1470 u32 numdw = instr_dw(*dw);
1471 u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1472
1473 /*
1474 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1475 * remaining size of the LRC.
1476 */
1477 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1478 numdw = remaining_dw;
1479
1480 switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1481 MATCH(STATE_WRITE_INLINE);
1482
1483 default:
1484 drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1485 *dw, opcode, numdw);
1486 return numdw;
1487 }
1488 }
1489
xe_lrc_dump_default(struct drm_printer * p,struct xe_gt * gt,enum xe_engine_class hwe_class)1490 void xe_lrc_dump_default(struct drm_printer *p,
1491 struct xe_gt *gt,
1492 enum xe_engine_class hwe_class)
1493 {
1494 u32 *dw;
1495 int remaining_dw, num_dw;
1496
1497 if (!gt->default_lrc[hwe_class]) {
1498 drm_printf(p, "No default LRC for class %d\n", hwe_class);
1499 return;
1500 }
1501
1502 /*
1503 * Skip the beginning of the LRC since it contains the per-process
1504 * hardware status page.
1505 */
1506 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1507 remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
1508
1509 while (remaining_dw > 0) {
1510 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1511 num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1512 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1513 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1514 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1515 num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
1516 } else {
1517 num_dw = min(instr_dw(*dw), remaining_dw);
1518 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1519 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1520 num_dw);
1521 }
1522
1523 dw += num_dw;
1524 remaining_dw -= num_dw;
1525 }
1526 }
1527
1528 struct instr_state {
1529 u32 instr;
1530 u16 num_dw;
1531 };
1532
1533 static const struct instr_state xe_hpg_svg_state[] = {
1534 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1535 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1536 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1537 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1538 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1539 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1540 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1541 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1542 { .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1543 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1544 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1545 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1546 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1547 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1548 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1549 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1550 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1551 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1552 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1553 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1554 { .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1555 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1556 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1557 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1558 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1559 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1560 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1561 { .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1562 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1563 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1564 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1565 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1566 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1567 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1568 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1569 { .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1570 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1571 { .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1572 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1573 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1574 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1575 { .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1576 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1577 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1578 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1579 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1580 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1581 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1582 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1583 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1584 };
1585
xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue * q,struct xe_bb * bb)1586 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1587 {
1588 struct xe_gt *gt = q->hwe->gt;
1589 struct xe_device *xe = gt_to_xe(gt);
1590 const struct instr_state *state_table = NULL;
1591 int state_table_size = 0;
1592
1593 /*
1594 * Wa_14019789679
1595 *
1596 * If the driver doesn't explicitly emit the SVG instructions while
1597 * setting up the default LRC, the context switch will write 0's
1598 * (noops) into the LRC memory rather than the expected instruction
1599 * headers. Application contexts start out as a copy of the default
1600 * LRC, and if they also do not emit specific settings for some SVG
1601 * state, then on context restore they'll unintentionally inherit
1602 * whatever state setting the previous context had programmed into the
1603 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
1604 * prevent the hardware from resetting that state back to any specific
1605 * value).
1606 *
1607 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
1608 * since that's a specific state setting that can easily cause GPU
1609 * hangs if unintentionally inherited. However to be safe we'll
1610 * continue to emit all of the SVG state since it's best not to leak
1611 * any of the state between contexts, even if that leakage is harmless.
1612 */
1613 if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
1614 state_table = xe_hpg_svg_state;
1615 state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1616 }
1617
1618 if (!state_table) {
1619 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1620 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1621 return;
1622 }
1623
1624 for (int i = 0; i < state_table_size; i++) {
1625 u32 instr = state_table[i].instr;
1626 u16 num_dw = state_table[i].num_dw;
1627 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1628
1629 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1630 xe_gt_assert(gt, num_dw != 0);
1631 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1632
1633 /*
1634 * Xe2's SVG context is the same as the one on DG2 / MTL
1635 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1636 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1637 * Just make the replacement here rather than defining a
1638 * whole separate table for the single trivial change.
1639 */
1640 if (GRAPHICS_VER(xe) >= 20 &&
1641 instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1642 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1643
1644 bb->cs[bb->len] = instr;
1645 if (!is_single_dw)
1646 bb->cs[bb->len] |= (num_dw - 2);
1647
1648 bb->len += num_dw;
1649 }
1650 }
1651
xe_lrc_snapshot_capture(struct xe_lrc * lrc)1652 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1653 {
1654 struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1655
1656 if (!snapshot)
1657 return NULL;
1658
1659 if (lrc->bo->vm)
1660 xe_vm_get(lrc->bo->vm);
1661
1662 snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
1663 snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc);
1664 snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
1665 snapshot->head = xe_lrc_ring_head(lrc);
1666 snapshot->tail.internal = lrc->ring.tail;
1667 snapshot->tail.memory = xe_lrc_ring_tail(lrc);
1668 snapshot->start = xe_lrc_ring_start(lrc);
1669 snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1670 snapshot->seqno = xe_lrc_seqno(lrc);
1671 snapshot->lrc_bo = xe_bo_get(lrc->bo);
1672 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
1673 snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
1674 snapshot->lrc_snapshot = NULL;
1675 snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1676 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
1677 return snapshot;
1678 }
1679
xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot * snapshot)1680 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
1681 {
1682 struct xe_bo *bo;
1683 struct xe_vm *vm;
1684 struct iosys_map src;
1685
1686 if (!snapshot)
1687 return;
1688
1689 bo = snapshot->lrc_bo;
1690 vm = bo->vm;
1691 snapshot->lrc_bo = NULL;
1692
1693 snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
1694 if (!snapshot->lrc_snapshot)
1695 goto put_bo;
1696
1697 xe_bo_lock(bo, false);
1698 if (!ttm_bo_vmap(&bo->ttm, &src)) {
1699 xe_map_memcpy_from(xe_bo_device(bo),
1700 snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
1701 snapshot->lrc_size);
1702 ttm_bo_vunmap(&bo->ttm, &src);
1703 } else {
1704 kvfree(snapshot->lrc_snapshot);
1705 snapshot->lrc_snapshot = NULL;
1706 }
1707 xe_bo_unlock(bo);
1708 put_bo:
1709 xe_bo_put(bo);
1710 if (vm)
1711 xe_vm_put(vm);
1712 }
1713
xe_lrc_snapshot_print(struct xe_lrc_snapshot * snapshot,struct drm_printer * p)1714 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
1715 {
1716 unsigned long i;
1717
1718 if (!snapshot)
1719 return;
1720
1721 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
1722 drm_printf(p, "\tHW Ring address: 0x%08x\n",
1723 snapshot->ring_addr);
1724 drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
1725 snapshot->indirect_context_desc);
1726 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
1727 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1728 snapshot->tail.internal, snapshot->tail.memory);
1729 drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start);
1730 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
1731 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
1732 drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
1733 drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
1734
1735 if (!snapshot->lrc_snapshot)
1736 return;
1737
1738 drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
1739 drm_puts(p, "\t[HWSP].data: ");
1740 for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
1741 u32 *val = snapshot->lrc_snapshot + i;
1742 char dumped[ASCII85_BUFSZ];
1743
1744 drm_puts(p, ascii85_encode(*val, dumped));
1745 }
1746
1747 drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
1748 drm_puts(p, "\t[HWCTX].data: ");
1749 for (; i < snapshot->lrc_size; i += sizeof(u32)) {
1750 u32 *val = snapshot->lrc_snapshot + i;
1751 char dumped[ASCII85_BUFSZ];
1752
1753 drm_puts(p, ascii85_encode(*val, dumped));
1754 }
1755 drm_puts(p, "\n");
1756 }
1757
xe_lrc_snapshot_free(struct xe_lrc_snapshot * snapshot)1758 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
1759 {
1760 if (!snapshot)
1761 return;
1762
1763 kvfree(snapshot->lrc_snapshot);
1764 if (snapshot->lrc_bo) {
1765 struct xe_vm *vm;
1766
1767 vm = snapshot->lrc_bo->vm;
1768 xe_bo_put(snapshot->lrc_bo);
1769 if (vm)
1770 xe_vm_put(vm);
1771 }
1772 kfree(snapshot);
1773 }
1774
1775 /**
1776 * xe_lrc_update_timestamp() - Update ctx timestamp
1777 * @lrc: Pointer to the lrc.
1778 * @old_ts: Old timestamp value
1779 *
1780 * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
1781 * update saved value.
1782 *
1783 * Returns: New ctx timestamp value
1784 */
xe_lrc_update_timestamp(struct xe_lrc * lrc,u32 * old_ts)1785 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
1786 {
1787 *old_ts = lrc->ctx_timestamp;
1788
1789 lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1790
1791 trace_xe_lrc_update_timestamp(lrc, *old_ts);
1792
1793 return lrc->ctx_timestamp;
1794 }
1795
1796 /**
1797 * xe_lrc_ring_is_idle() - LRC is idle
1798 * @lrc: Pointer to the lrc.
1799 *
1800 * Compare LRC ring head and tail to determine if idle.
1801 *
1802 * Return: True is ring is idle, False otherwise
1803 */
xe_lrc_ring_is_idle(struct xe_lrc * lrc)1804 bool xe_lrc_ring_is_idle(struct xe_lrc *lrc)
1805 {
1806 return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc);
1807 }
1808