1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_lrc.h"
7
8 #include <generated/xe_wa_oob.h>
9
10 #include <linux/ascii85.h>
11
12 #include "instructions/xe_mi_commands.h"
13 #include "instructions/xe_gfxpipe_commands.h"
14 #include "instructions/xe_gfx_state_commands.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_lrc_layout.h"
17 #include "xe_bb.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_drm_client.h"
21 #include "xe_exec_queue_types.h"
22 #include "xe_gt.h"
23 #include "xe_gt_printk.h"
24 #include "xe_hw_fence.h"
25 #include "xe_map.h"
26 #include "xe_memirq.h"
27 #include "xe_mmio.h"
28 #include "xe_sriov.h"
29 #include "xe_trace_lrc.h"
30 #include "xe_vm.h"
31 #include "xe_wa.h"
32
33 #define LRC_VALID BIT_ULL(0)
34 #define LRC_PRIVILEGE BIT_ULL(8)
35 #define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3)
36 #define LRC_LEGACY_64B_CONTEXT 3
37
38 #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
39 #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
40
41 #define LRC_PPHWSP_SIZE SZ_4K
42 #define LRC_INDIRECT_CTX_BO_SIZE SZ_4K
43 #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
44 #define LRC_WA_BB_SIZE SZ_4K
45
46 /*
47 * Layout of the LRC and associated data allocated as
48 * lrc->bo:
49 *
50 * Region Size
51 * +============================+=================================+ <- __xe_lrc_ring_offset()
52 * | Ring | ring_size, see |
53 * | | xe_lrc_init() |
54 * +============================+=================================+ <- __xe_lrc_pphwsp_offset()
55 * | PPHWSP (includes SW state) | 4K |
56 * +----------------------------+---------------------------------+ <- __xe_lrc_regs_offset()
57 * | Engine Context Image | n * 4K, see |
58 * | | xe_gt_lrc_size() |
59 * +----------------------------+---------------------------------+ <- __xe_lrc_indirect_ring_offset()
60 * | Indirect Ring State Page | 0 or 4k, see |
61 * | | XE_LRC_FLAG_INDIRECT_RING_STATE |
62 * +============================+=================================+ <- __xe_lrc_indirect_ctx_offset()
63 * | Indirect Context Page | 0 or 4k, see |
64 * | | XE_LRC_FLAG_INDIRECT_CTX |
65 * +============================+=================================+ <- __xe_lrc_wa_bb_offset()
66 * | WA BB Per Ctx | 4k |
67 * +============================+=================================+ <- xe_bo_size(lrc->bo)
68 */
69
70 static struct xe_device *
lrc_to_xe(struct xe_lrc * lrc)71 lrc_to_xe(struct xe_lrc *lrc)
72 {
73 return gt_to_xe(lrc->fence_ctx.gt);
74 }
75
76 static bool
gt_engine_needs_indirect_ctx(struct xe_gt * gt,enum xe_engine_class class)77 gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class)
78 {
79 return false;
80 }
81
xe_gt_lrc_size(struct xe_gt * gt,enum xe_engine_class class)82 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
83 {
84 struct xe_device *xe = gt_to_xe(gt);
85 size_t size;
86
87 /* Per-process HW status page (PPHWSP) */
88 size = LRC_PPHWSP_SIZE;
89
90 /* Engine context image */
91 switch (class) {
92 case XE_ENGINE_CLASS_RENDER:
93 if (GRAPHICS_VER(xe) >= 20)
94 size += 3 * SZ_4K;
95 else
96 size += 13 * SZ_4K;
97 break;
98 case XE_ENGINE_CLASS_COMPUTE:
99 if (GRAPHICS_VER(xe) >= 20)
100 size += 2 * SZ_4K;
101 else
102 size += 13 * SZ_4K;
103 break;
104 default:
105 WARN(1, "Unknown engine class: %d", class);
106 fallthrough;
107 case XE_ENGINE_CLASS_COPY:
108 case XE_ENGINE_CLASS_VIDEO_DECODE:
109 case XE_ENGINE_CLASS_VIDEO_ENHANCE:
110 case XE_ENGINE_CLASS_OTHER:
111 size += 1 * SZ_4K;
112 }
113
114 /* Add indirect ring state page */
115 if (xe_gt_has_indirect_ring_state(gt))
116 size += LRC_INDIRECT_RING_STATE_SIZE;
117
118 return size;
119 }
120
121 /*
122 * The per-platform tables are u8-encoded in @data. Decode @data and set the
123 * addresses' offset and commands in @regs. The following encoding is used
124 * for each byte. There are 2 steps: decoding commands and decoding addresses.
125 *
126 * Commands:
127 * [7]: create NOPs - number of NOPs are set in lower bits
128 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
129 * MI_LRI_FORCE_POSTED
130 * [5:0]: Number of NOPs or registers to set values to in case of
131 * MI_LOAD_REGISTER_IMM
132 *
133 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
134 * number of registers. They are set by using the REG/REG16 macros: the former
135 * is used for offsets smaller than 0x200 while the latter is for values bigger
136 * than that. Those macros already set all the bits documented below correctly:
137 *
138 * [7]: When a register offset needs more than 6 bits, use additional bytes, to
139 * follow, for the lower bits
140 * [6:0]: Register offset, without considering the engine base.
141 *
142 * This function only tweaks the commands and register offsets. Values are not
143 * filled out.
144 */
set_offsets(u32 * regs,const u8 * data,const struct xe_hw_engine * hwe)145 static void set_offsets(u32 *regs,
146 const u8 *data,
147 const struct xe_hw_engine *hwe)
148 #define NOP(x) (BIT(7) | (x))
149 #define LRI(count, flags) ((flags) << 6 | (count) | \
150 BUILD_BUG_ON_ZERO(count >= BIT(6)))
151 #define POSTED BIT(0)
152 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
153 #define REG16(x) \
154 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
155 (((x) >> 2) & 0x7f)
156 {
157 const u32 base = hwe->mmio_base;
158
159 while (*data) {
160 u8 count, flags;
161
162 if (*data & BIT(7)) { /* skip */
163 count = *data++ & ~BIT(7);
164 regs += count;
165 continue;
166 }
167
168 count = *data & 0x3f;
169 flags = *data >> 6;
170 data++;
171
172 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
173 if (flags & POSTED)
174 *regs |= MI_LRI_FORCE_POSTED;
175 *regs |= MI_LRI_LRM_CS_MMIO;
176 regs++;
177
178 xe_gt_assert(hwe->gt, count);
179 do {
180 u32 offset = 0;
181 u8 v;
182
183 do {
184 v = *data++;
185 offset <<= 7;
186 offset |= v & ~BIT(7);
187 } while (v & BIT(7));
188
189 regs[0] = base + (offset << 2);
190 regs += 2;
191 } while (--count);
192 }
193
194 *regs = MI_BATCH_BUFFER_END | BIT(0);
195 }
196
197 static const u8 gen12_xcs_offsets[] = {
198 NOP(1),
199 LRI(13, POSTED),
200 REG16(0x244),
201 REG(0x034),
202 REG(0x030),
203 REG(0x038),
204 REG(0x03c),
205 REG(0x168),
206 REG(0x140),
207 REG(0x110),
208 REG(0x1c0),
209 REG(0x1c4),
210 REG(0x1c8),
211 REG(0x180),
212 REG16(0x2b4),
213
214 NOP(5),
215 LRI(9, POSTED),
216 REG16(0x3a8),
217 REG16(0x28c),
218 REG16(0x288),
219 REG16(0x284),
220 REG16(0x280),
221 REG16(0x27c),
222 REG16(0x278),
223 REG16(0x274),
224 REG16(0x270),
225
226 0
227 };
228
229 static const u8 dg2_xcs_offsets[] = {
230 NOP(1),
231 LRI(15, POSTED),
232 REG16(0x244),
233 REG(0x034),
234 REG(0x030),
235 REG(0x038),
236 REG(0x03c),
237 REG(0x168),
238 REG(0x140),
239 REG(0x110),
240 REG(0x1c0),
241 REG(0x1c4),
242 REG(0x1c8),
243 REG(0x180),
244 REG16(0x2b4),
245 REG(0x120),
246 REG(0x124),
247
248 NOP(1),
249 LRI(9, POSTED),
250 REG16(0x3a8),
251 REG16(0x28c),
252 REG16(0x288),
253 REG16(0x284),
254 REG16(0x280),
255 REG16(0x27c),
256 REG16(0x278),
257 REG16(0x274),
258 REG16(0x270),
259
260 0
261 };
262
263 static const u8 gen12_rcs_offsets[] = {
264 NOP(1),
265 LRI(13, POSTED),
266 REG16(0x244),
267 REG(0x034),
268 REG(0x030),
269 REG(0x038),
270 REG(0x03c),
271 REG(0x168),
272 REG(0x140),
273 REG(0x110),
274 REG(0x1c0),
275 REG(0x1c4),
276 REG(0x1c8),
277 REG(0x180),
278 REG16(0x2b4),
279
280 NOP(5),
281 LRI(9, POSTED),
282 REG16(0x3a8),
283 REG16(0x28c),
284 REG16(0x288),
285 REG16(0x284),
286 REG16(0x280),
287 REG16(0x27c),
288 REG16(0x278),
289 REG16(0x274),
290 REG16(0x270),
291
292 LRI(3, POSTED),
293 REG(0x1b0),
294 REG16(0x5a8),
295 REG16(0x5ac),
296
297 NOP(6),
298 LRI(1, 0),
299 REG(0x0c8),
300 NOP(3 + 9 + 1),
301
302 LRI(51, POSTED),
303 REG16(0x588),
304 REG16(0x588),
305 REG16(0x588),
306 REG16(0x588),
307 REG16(0x588),
308 REG16(0x588),
309 REG(0x028),
310 REG(0x09c),
311 REG(0x0c0),
312 REG(0x178),
313 REG(0x17c),
314 REG16(0x358),
315 REG(0x170),
316 REG(0x150),
317 REG(0x154),
318 REG(0x158),
319 REG16(0x41c),
320 REG16(0x600),
321 REG16(0x604),
322 REG16(0x608),
323 REG16(0x60c),
324 REG16(0x610),
325 REG16(0x614),
326 REG16(0x618),
327 REG16(0x61c),
328 REG16(0x620),
329 REG16(0x624),
330 REG16(0x628),
331 REG16(0x62c),
332 REG16(0x630),
333 REG16(0x634),
334 REG16(0x638),
335 REG16(0x63c),
336 REG16(0x640),
337 REG16(0x644),
338 REG16(0x648),
339 REG16(0x64c),
340 REG16(0x650),
341 REG16(0x654),
342 REG16(0x658),
343 REG16(0x65c),
344 REG16(0x660),
345 REG16(0x664),
346 REG16(0x668),
347 REG16(0x66c),
348 REG16(0x670),
349 REG16(0x674),
350 REG16(0x678),
351 REG16(0x67c),
352 REG(0x068),
353 REG(0x084),
354 NOP(1),
355
356 0
357 };
358
359 static const u8 xehp_rcs_offsets[] = {
360 NOP(1),
361 LRI(13, POSTED),
362 REG16(0x244),
363 REG(0x034),
364 REG(0x030),
365 REG(0x038),
366 REG(0x03c),
367 REG(0x168),
368 REG(0x140),
369 REG(0x110),
370 REG(0x1c0),
371 REG(0x1c4),
372 REG(0x1c8),
373 REG(0x180),
374 REG16(0x2b4),
375
376 NOP(5),
377 LRI(9, POSTED),
378 REG16(0x3a8),
379 REG16(0x28c),
380 REG16(0x288),
381 REG16(0x284),
382 REG16(0x280),
383 REG16(0x27c),
384 REG16(0x278),
385 REG16(0x274),
386 REG16(0x270),
387
388 LRI(3, POSTED),
389 REG(0x1b0),
390 REG16(0x5a8),
391 REG16(0x5ac),
392
393 NOP(6),
394 LRI(1, 0),
395 REG(0x0c8),
396
397 0
398 };
399
400 static const u8 dg2_rcs_offsets[] = {
401 NOP(1),
402 LRI(15, POSTED),
403 REG16(0x244),
404 REG(0x034),
405 REG(0x030),
406 REG(0x038),
407 REG(0x03c),
408 REG(0x168),
409 REG(0x140),
410 REG(0x110),
411 REG(0x1c0),
412 REG(0x1c4),
413 REG(0x1c8),
414 REG(0x180),
415 REG16(0x2b4),
416 REG(0x120),
417 REG(0x124),
418
419 NOP(1),
420 LRI(9, POSTED),
421 REG16(0x3a8),
422 REG16(0x28c),
423 REG16(0x288),
424 REG16(0x284),
425 REG16(0x280),
426 REG16(0x27c),
427 REG16(0x278),
428 REG16(0x274),
429 REG16(0x270),
430
431 LRI(3, POSTED),
432 REG(0x1b0),
433 REG16(0x5a8),
434 REG16(0x5ac),
435
436 NOP(6),
437 LRI(1, 0),
438 REG(0x0c8),
439
440 0
441 };
442
443 static const u8 mtl_rcs_offsets[] = {
444 NOP(1),
445 LRI(15, POSTED),
446 REG16(0x244),
447 REG(0x034),
448 REG(0x030),
449 REG(0x038),
450 REG(0x03c),
451 REG(0x168),
452 REG(0x140),
453 REG(0x110),
454 REG(0x1c0),
455 REG(0x1c4),
456 REG(0x1c8),
457 REG(0x180),
458 REG16(0x2b4),
459 REG(0x120),
460 REG(0x124),
461
462 NOP(1),
463 LRI(9, POSTED),
464 REG16(0x3a8),
465 REG16(0x28c),
466 REG16(0x288),
467 REG16(0x284),
468 REG16(0x280),
469 REG16(0x27c),
470 REG16(0x278),
471 REG16(0x274),
472 REG16(0x270),
473
474 NOP(2),
475 LRI(2, POSTED),
476 REG16(0x5a8),
477 REG16(0x5ac),
478
479 NOP(6),
480 LRI(1, 0),
481 REG(0x0c8),
482
483 0
484 };
485
486 #define XE2_CTX_COMMON \
487 NOP(1), /* [0x00] */ \
488 LRI(15, POSTED), /* [0x01] */ \
489 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \
490 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \
491 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \
492 REG(0x038), /* [0x08] RING_BUFFER_START */ \
493 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \
494 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \
495 REG(0x140), /* [0x0e] BB_ADDR */ \
496 REG(0x110), /* [0x10] BB_STATE */ \
497 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \
498 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \
499 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
500 REG(0x180), /* [0x18] CCID */ \
501 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \
502 REG(0x120), /* [0x1c] PRT_BB_STATE */ \
503 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \
504 \
505 NOP(1), /* [0x20] */ \
506 LRI(9, POSTED), /* [0x21] */ \
507 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \
508 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \
509 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \
510 REG16(0x284), /* [0x28] dummy reg */ \
511 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \
512 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \
513 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \
514 REG16(0x274), /* [0x30] PTBP_UDW */ \
515 REG16(0x270) /* [0x32] PTBP_LDW */
516
517 static const u8 xe2_rcs_offsets[] = {
518 XE2_CTX_COMMON,
519
520 NOP(2), /* [0x34] */
521 LRI(2, POSTED), /* [0x36] */
522 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
523 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */
524
525 NOP(6), /* [0x41] */
526 LRI(1, 0), /* [0x47] */
527 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
528
529 0
530 };
531
532 static const u8 xe2_bcs_offsets[] = {
533 XE2_CTX_COMMON,
534
535 NOP(4 + 8 + 1), /* [0x34] */
536 LRI(2, POSTED), /* [0x41] */
537 REG16(0x200), /* [0x42] BCS_SWCTRL */
538 REG16(0x204), /* [0x44] BLIT_CCTL */
539
540 0
541 };
542
543 static const u8 xe2_xcs_offsets[] = {
544 XE2_CTX_COMMON,
545
546 0
547 };
548
549 static const u8 xe2_indirect_ring_state_offsets[] = {
550 NOP(1), /* [0x00] */
551 LRI(5, POSTED), /* [0x01] */
552 REG(0x034), /* [0x02] RING_BUFFER_HEAD */
553 REG(0x030), /* [0x04] RING_BUFFER_TAIL */
554 REG(0x038), /* [0x06] RING_BUFFER_START */
555 REG(0x048), /* [0x08] RING_BUFFER_START_UDW */
556 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */
557
558 NOP(5), /* [0x0c] */
559 LRI(9, POSTED), /* [0x11] */
560 REG(0x168), /* [0x12] BB_ADDR_UDW */
561 REG(0x140), /* [0x14] BB_ADDR */
562 REG(0x110), /* [0x16] BB_STATE */
563 REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */
564 REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */
565 REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */
566 REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */
567 REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */
568 REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */
569
570 NOP(12), /* [0x00] */
571
572 0
573 };
574
575 #undef REG16
576 #undef REG
577 #undef LRI
578 #undef NOP
579
reg_offsets(struct xe_device * xe,enum xe_engine_class class)580 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
581 {
582 if (class == XE_ENGINE_CLASS_RENDER) {
583 if (GRAPHICS_VER(xe) >= 20)
584 return xe2_rcs_offsets;
585 else if (GRAPHICS_VERx100(xe) >= 1270)
586 return mtl_rcs_offsets;
587 else if (GRAPHICS_VERx100(xe) >= 1255)
588 return dg2_rcs_offsets;
589 else if (GRAPHICS_VERx100(xe) >= 1250)
590 return xehp_rcs_offsets;
591 else
592 return gen12_rcs_offsets;
593 } else if (class == XE_ENGINE_CLASS_COPY) {
594 if (GRAPHICS_VER(xe) >= 20)
595 return xe2_bcs_offsets;
596 else
597 return gen12_xcs_offsets;
598 } else {
599 if (GRAPHICS_VER(xe) >= 20)
600 return xe2_xcs_offsets;
601 else if (GRAPHICS_VERx100(xe) >= 1255)
602 return dg2_xcs_offsets;
603 else
604 return gen12_xcs_offsets;
605 }
606 }
607
set_context_control(u32 * regs,struct xe_hw_engine * hwe)608 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
609 {
610 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
611 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
612
613 if (xe_gt_has_indirect_ring_state(hwe->gt))
614 regs[CTX_CONTEXT_CONTROL] |=
615 _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
616 }
617
set_memory_based_intr(u32 * regs,struct xe_hw_engine * hwe)618 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
619 {
620 struct xe_memirq *memirq = >_to_tile(hwe->gt)->memirq;
621 struct xe_device *xe = gt_to_xe(hwe->gt);
622 u8 num_regs;
623
624 if (!xe_device_uses_memirq(xe))
625 return;
626
627 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
628 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
629 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
630 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
631
632 num_regs = xe_device_has_msix(xe) ? 3 : 2;
633 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) |
634 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
635 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
636 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
637 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
638 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
639
640 if (xe_device_has_msix(xe)) {
641 regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr;
642 /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */
643 }
644 }
645
lrc_ring_mi_mode(struct xe_hw_engine * hwe)646 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
647 {
648 struct xe_device *xe = gt_to_xe(hwe->gt);
649
650 if (GRAPHICS_VERx100(xe) >= 1250)
651 return 0x70;
652 else
653 return 0x60;
654 }
655
reset_stop_ring(u32 * regs,struct xe_hw_engine * hwe)656 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
657 {
658 int x;
659
660 x = lrc_ring_mi_mode(hwe);
661 regs[x + 1] &= ~STOP_RING;
662 regs[x + 1] |= STOP_RING << 16;
663 }
664
xe_lrc_has_indirect_ring_state(struct xe_lrc * lrc)665 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
666 {
667 return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
668 }
669
__xe_lrc_ring_offset(struct xe_lrc * lrc)670 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
671 {
672 return 0;
673 }
674
xe_lrc_pphwsp_offset(struct xe_lrc * lrc)675 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
676 {
677 return lrc->ring.size;
678 }
679
680 /* Make the magic macros work */
681 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
682 #define __xe_lrc_regs_offset xe_lrc_regs_offset
683
684 #define LRC_SEQNO_PPHWSP_OFFSET 512
685 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
686 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
687 #define LRC_ENGINE_ID_PPHWSP_OFFSET 1024
688 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
689
xe_lrc_regs_offset(struct xe_lrc * lrc)690 u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
691 {
692 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
693 }
694
lrc_reg_size(struct xe_device * xe)695 static size_t lrc_reg_size(struct xe_device *xe)
696 {
697 if (GRAPHICS_VERx100(xe) >= 1250)
698 return 96 * sizeof(u32);
699 else
700 return 80 * sizeof(u32);
701 }
702
xe_lrc_skip_size(struct xe_device * xe)703 size_t xe_lrc_skip_size(struct xe_device *xe)
704 {
705 return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
706 }
707
__xe_lrc_seqno_offset(struct xe_lrc * lrc)708 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
709 {
710 /* The seqno is stored in the driver-defined portion of PPHWSP */
711 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
712 }
713
__xe_lrc_start_seqno_offset(struct xe_lrc * lrc)714 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
715 {
716 /* The start seqno is stored in the driver-defined portion of PPHWSP */
717 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
718 }
719
__xe_lrc_ctx_job_timestamp_offset(struct xe_lrc * lrc)720 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
721 {
722 /* This is stored in the driver-defined portion of PPHWSP */
723 return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
724 }
725
__xe_lrc_parallel_offset(struct xe_lrc * lrc)726 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
727 {
728 /* The parallel is stored in the driver-defined portion of PPHWSP */
729 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
730 }
731
__xe_lrc_engine_id_offset(struct xe_lrc * lrc)732 static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc)
733 {
734 return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET;
735 }
736
__xe_lrc_ctx_timestamp_offset(struct xe_lrc * lrc)737 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
738 {
739 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
740 }
741
__xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc * lrc)742 static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc)
743 {
744 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32);
745 }
746
__xe_lrc_indirect_ring_offset(struct xe_lrc * lrc)747 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
748 {
749 u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE -
750 LRC_INDIRECT_RING_STATE_SIZE;
751
752 if (lrc->flags & XE_LRC_FLAG_INDIRECT_CTX)
753 offset -= LRC_INDIRECT_CTX_BO_SIZE;
754
755 return offset;
756 }
757
__xe_lrc_indirect_ctx_offset(struct xe_lrc * lrc)758 static inline u32 __xe_lrc_indirect_ctx_offset(struct xe_lrc *lrc)
759 {
760 return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_CTX_BO_SIZE;
761 }
762
__xe_lrc_wa_bb_offset(struct xe_lrc * lrc)763 static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc)
764 {
765 return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE;
766 }
767
768 #define DECL_MAP_ADDR_HELPERS(elem) \
769 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
770 { \
771 struct iosys_map map = lrc->bo->vmap; \
772 \
773 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \
774 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
775 return map; \
776 } \
777 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
778 { \
779 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
780 } \
781
782 DECL_MAP_ADDR_HELPERS(ring)
DECL_MAP_ADDR_HELPERS(pphwsp)783 DECL_MAP_ADDR_HELPERS(pphwsp)
784 DECL_MAP_ADDR_HELPERS(seqno)
785 DECL_MAP_ADDR_HELPERS(regs)
786 DECL_MAP_ADDR_HELPERS(start_seqno)
787 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
788 DECL_MAP_ADDR_HELPERS(ctx_timestamp)
789 DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw)
790 DECL_MAP_ADDR_HELPERS(parallel)
791 DECL_MAP_ADDR_HELPERS(indirect_ring)
792 DECL_MAP_ADDR_HELPERS(engine_id)
793
794 #undef DECL_MAP_ADDR_HELPERS
795
796 /**
797 * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
798 * @lrc: Pointer to the lrc.
799 *
800 * Returns: ctx timestamp GGTT address
801 */
802 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
803 {
804 return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
805 }
806
807 /**
808 * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address
809 * @lrc: Pointer to the lrc.
810 *
811 * Returns: ctx timestamp udw GGTT address
812 */
xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc * lrc)813 u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc)
814 {
815 return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
816 }
817
818 /**
819 * xe_lrc_ctx_timestamp() - Read ctx timestamp value
820 * @lrc: Pointer to the lrc.
821 *
822 * Returns: ctx timestamp value
823 */
xe_lrc_ctx_timestamp(struct xe_lrc * lrc)824 u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
825 {
826 struct xe_device *xe = lrc_to_xe(lrc);
827 struct iosys_map map;
828 u32 ldw, udw = 0;
829
830 map = __xe_lrc_ctx_timestamp_map(lrc);
831 ldw = xe_map_read32(xe, &map);
832
833 if (xe->info.has_64bit_timestamp) {
834 map = __xe_lrc_ctx_timestamp_udw_map(lrc);
835 udw = xe_map_read32(xe, &map);
836 }
837
838 return (u64)udw << 32 | ldw;
839 }
840
841 /**
842 * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
843 * @lrc: Pointer to the lrc.
844 *
845 * Returns: ctx timestamp job GGTT address
846 */
xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc * lrc)847 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
848 {
849 return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
850 }
851
852 /**
853 * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
854 * @lrc: Pointer to the lrc.
855 *
856 * Returns: ctx timestamp job value
857 */
xe_lrc_ctx_job_timestamp(struct xe_lrc * lrc)858 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
859 {
860 struct xe_device *xe = lrc_to_xe(lrc);
861 struct iosys_map map;
862
863 map = __xe_lrc_ctx_job_timestamp_map(lrc);
864 return xe_map_read32(xe, &map);
865 }
866
xe_lrc_ggtt_addr(struct xe_lrc * lrc)867 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
868 {
869 return __xe_lrc_pphwsp_ggtt_addr(lrc);
870 }
871
xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc * lrc)872 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
873 {
874 if (!xe_lrc_has_indirect_ring_state(lrc))
875 return 0;
876
877 return __xe_lrc_indirect_ring_ggtt_addr(lrc);
878 }
879
xe_lrc_read_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr)880 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
881 {
882 struct xe_device *xe = lrc_to_xe(lrc);
883 struct iosys_map map;
884
885 map = __xe_lrc_indirect_ring_map(lrc);
886 iosys_map_incr(&map, reg_nr * sizeof(u32));
887 return xe_map_read32(xe, &map);
888 }
889
xe_lrc_write_indirect_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)890 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
891 int reg_nr, u32 val)
892 {
893 struct xe_device *xe = lrc_to_xe(lrc);
894 struct iosys_map map;
895
896 map = __xe_lrc_indirect_ring_map(lrc);
897 iosys_map_incr(&map, reg_nr * sizeof(u32));
898 xe_map_write32(xe, &map, val);
899 }
900
xe_lrc_read_ctx_reg(struct xe_lrc * lrc,int reg_nr)901 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
902 {
903 struct xe_device *xe = lrc_to_xe(lrc);
904 struct iosys_map map;
905
906 map = __xe_lrc_regs_map(lrc);
907 iosys_map_incr(&map, reg_nr * sizeof(u32));
908 return xe_map_read32(xe, &map);
909 }
910
xe_lrc_write_ctx_reg(struct xe_lrc * lrc,int reg_nr,u32 val)911 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
912 {
913 struct xe_device *xe = lrc_to_xe(lrc);
914 struct iosys_map map;
915
916 map = __xe_lrc_regs_map(lrc);
917 iosys_map_incr(&map, reg_nr * sizeof(u32));
918 xe_map_write32(xe, &map, val);
919 }
920
empty_lrc_data(struct xe_hw_engine * hwe)921 static void *empty_lrc_data(struct xe_hw_engine *hwe)
922 {
923 struct xe_gt *gt = hwe->gt;
924 void *data;
925 u32 *regs;
926
927 data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
928 if (!data)
929 return NULL;
930
931 /* 1st page: Per-Process of HW status Page */
932 regs = data + LRC_PPHWSP_SIZE;
933 set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
934 set_context_control(regs, hwe);
935 set_memory_based_intr(regs, hwe);
936 reset_stop_ring(regs, hwe);
937 if (xe_gt_has_indirect_ring_state(gt)) {
938 regs = data + xe_gt_lrc_size(gt, hwe->class) -
939 LRC_INDIRECT_RING_STATE_SIZE;
940 set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
941 }
942
943 return data;
944 }
945
xe_lrc_set_ppgtt(struct xe_lrc * lrc,struct xe_vm * vm)946 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
947 {
948 u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt));
949
950 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
951 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
952 }
953
xe_lrc_finish(struct xe_lrc * lrc)954 static void xe_lrc_finish(struct xe_lrc *lrc)
955 {
956 xe_hw_fence_ctx_finish(&lrc->fence_ctx);
957 xe_bo_unpin_map_no_vm(lrc->bo);
958 }
959
960 /*
961 * wa_bb_setup_utilization() - Write commands to wa bb to assist
962 * in calculating active context run ticks.
963 *
964 * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the
965 * context, but only gets updated when the context switches out. In order to
966 * check how long a context has been active before it switches out, two things
967 * are required:
968 *
969 * (1) Determine if the context is running:
970 * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in
971 * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is
972 * initialized. During a query, we just check for this value to determine if the
973 * context is active. If the context switched out, it would overwrite this
974 * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as
975 * the last part of context restore, so reusing this LRC location will not
976 * clobber anything.
977 *
978 * (2) Calculate the time that the context has been active for:
979 * The CTX_TIMESTAMP ticks only when the context is active. If a context is
980 * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization.
981 * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific
982 * engine instance. Since we do not know which instance the context is running
983 * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and
984 * store it in the PPHSWP.
985 */
986 #define CONTEXT_ACTIVE 1ULL
setup_utilization_wa(struct xe_lrc * lrc,struct xe_hw_engine * hwe,u32 * batch,size_t max_len)987 static ssize_t setup_utilization_wa(struct xe_lrc *lrc,
988 struct xe_hw_engine *hwe,
989 u32 *batch,
990 size_t max_len)
991 {
992 u32 *cmd = batch;
993
994 if (xe_gt_WARN_ON(lrc->gt, max_len < 12))
995 return -ENOSPC;
996
997 *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
998 *cmd++ = ENGINE_ID(0).addr;
999 *cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc);
1000 *cmd++ = 0;
1001
1002 *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
1003 *cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
1004 *cmd++ = 0;
1005 *cmd++ = lower_32_bits(CONTEXT_ACTIVE);
1006
1007 if (lrc_to_xe(lrc)->info.has_64bit_timestamp) {
1008 *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
1009 *cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
1010 *cmd++ = 0;
1011 *cmd++ = upper_32_bits(CONTEXT_ACTIVE);
1012 }
1013
1014 return cmd - batch;
1015 }
1016
1017 struct bo_setup {
1018 ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
1019 u32 *batch, size_t max_size);
1020 };
1021
1022 struct bo_setup_state {
1023 /* Input: */
1024 struct xe_lrc *lrc;
1025 struct xe_hw_engine *hwe;
1026 size_t max_size;
1027 size_t reserve_dw;
1028 unsigned int offset;
1029 const struct bo_setup *funcs;
1030 unsigned int num_funcs;
1031
1032 /* State: */
1033 u32 *buffer;
1034 u32 *ptr;
1035 unsigned int written;
1036 };
1037
setup_bo(struct bo_setup_state * state)1038 static int setup_bo(struct bo_setup_state *state)
1039 {
1040 ssize_t remain;
1041
1042 if (state->lrc->bo->vmap.is_iomem) {
1043 state->buffer = kmalloc(state->max_size, GFP_KERNEL);
1044 if (!state->buffer)
1045 return -ENOMEM;
1046 state->ptr = state->buffer;
1047 } else {
1048 state->ptr = state->lrc->bo->vmap.vaddr + state->offset;
1049 state->buffer = NULL;
1050 }
1051
1052 remain = state->max_size / sizeof(u32);
1053
1054 for (size_t i = 0; i < state->num_funcs; i++) {
1055 ssize_t len = state->funcs[i].setup(state->lrc, state->hwe,
1056 state->ptr, remain);
1057
1058 remain -= len;
1059
1060 /*
1061 * Caller has asked for at least reserve_dw to remain unused.
1062 */
1063 if (len < 0 ||
1064 xe_gt_WARN_ON(state->lrc->gt, remain < state->reserve_dw))
1065 goto fail;
1066
1067 state->ptr += len;
1068 state->written += len;
1069 }
1070
1071 return 0;
1072
1073 fail:
1074 kfree(state->buffer);
1075 return -ENOSPC;
1076 }
1077
finish_bo(struct bo_setup_state * state)1078 static void finish_bo(struct bo_setup_state *state)
1079 {
1080 if (!state->buffer)
1081 return;
1082
1083 xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap,
1084 state->offset, state->buffer,
1085 state->written * sizeof(u32));
1086 kfree(state->buffer);
1087 }
1088
setup_wa_bb(struct xe_lrc * lrc,struct xe_hw_engine * hwe)1089 static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
1090 {
1091 static const struct bo_setup funcs[] = {
1092 { .setup = setup_utilization_wa },
1093 };
1094 struct bo_setup_state state = {
1095 .lrc = lrc,
1096 .hwe = hwe,
1097 .max_size = LRC_WA_BB_SIZE,
1098 .reserve_dw = 1,
1099 .offset = __xe_lrc_wa_bb_offset(lrc),
1100 .funcs = funcs,
1101 .num_funcs = ARRAY_SIZE(funcs),
1102 };
1103 int ret;
1104
1105 ret = setup_bo(&state);
1106 if (ret)
1107 return ret;
1108
1109 *state.ptr++ = MI_BATCH_BUFFER_END;
1110 state.written++;
1111
1112 finish_bo(&state);
1113
1114 xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
1115 xe_bo_ggtt_addr(lrc->bo) + state.offset + 1);
1116
1117 return 0;
1118 }
1119
1120 static int
setup_indirect_ctx(struct xe_lrc * lrc,struct xe_hw_engine * hwe)1121 setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
1122 {
1123 static struct bo_setup rcs_funcs[] = {
1124 };
1125 struct bo_setup_state state = {
1126 .lrc = lrc,
1127 .hwe = hwe,
1128 .max_size = (63 * 64) /* max 63 cachelines */,
1129 .offset = __xe_lrc_indirect_ctx_offset(lrc),
1130 };
1131 int ret;
1132
1133 if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX))
1134 return 0;
1135
1136 if (hwe->class == XE_ENGINE_CLASS_RENDER ||
1137 hwe->class == XE_ENGINE_CLASS_COMPUTE) {
1138 state.funcs = rcs_funcs;
1139 state.num_funcs = ARRAY_SIZE(rcs_funcs);
1140 }
1141
1142 if (xe_gt_WARN_ON(lrc->gt, !state.funcs))
1143 return 0;
1144
1145 ret = setup_bo(&state);
1146 if (ret)
1147 return ret;
1148
1149 /*
1150 * Align to 64B cacheline so there's no garbage at the end for CS to
1151 * execute: size for indirect ctx must be a multiple of 64.
1152 */
1153 while (state.written & 0xf) {
1154 *state.ptr++ = MI_NOOP;
1155 state.written++;
1156 }
1157
1158 finish_bo(&state);
1159
1160 xe_lrc_write_ctx_reg(lrc,
1161 CTX_CS_INDIRECT_CTX,
1162 (xe_bo_ggtt_addr(lrc->bo) + state.offset) |
1163 /* Size in CLs. */
1164 (state.written * sizeof(u32) / 64));
1165 xe_lrc_write_ctx_reg(lrc,
1166 CTX_CS_INDIRECT_CTX_OFFSET,
1167 CTX_INDIRECT_CTX_OFFSET_DEFAULT);
1168
1169 return 0;
1170 }
1171
xe_lrc_init(struct xe_lrc * lrc,struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size,u16 msix_vec,u32 init_flags)1172 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
1173 struct xe_vm *vm, u32 ring_size, u16 msix_vec,
1174 u32 init_flags)
1175 {
1176 struct xe_gt *gt = hwe->gt;
1177 const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class);
1178 u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE;
1179 struct xe_tile *tile = gt_to_tile(gt);
1180 struct xe_device *xe = gt_to_xe(gt);
1181 struct iosys_map map;
1182 u32 arb_enable;
1183 u32 bo_flags;
1184 int err;
1185
1186 kref_init(&lrc->refcount);
1187 lrc->gt = gt;
1188 lrc->size = lrc_size;
1189 lrc->flags = 0;
1190 lrc->ring.size = ring_size;
1191 lrc->ring.tail = 0;
1192
1193 if (gt_engine_needs_indirect_ctx(gt, hwe->class)) {
1194 lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX;
1195 bo_size += LRC_INDIRECT_CTX_BO_SIZE;
1196 }
1197
1198 if (xe_gt_has_indirect_ring_state(gt))
1199 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
1200
1201 bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
1202 XE_BO_FLAG_GGTT_INVALIDATE;
1203 if (vm && vm->xef) /* userspace */
1204 bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
1205
1206 lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size,
1207 ttm_bo_type_kernel,
1208 bo_flags);
1209 if (IS_ERR(lrc->bo))
1210 return PTR_ERR(lrc->bo);
1211
1212 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
1213 hwe->fence_irq, hwe->name);
1214
1215 /*
1216 * Init Per-Process of HW status Page, LRC / context state to known
1217 * values. If there's already a primed default_lrc, just copy it, otherwise
1218 * it's the early submission to record the lrc: build a new empty one from
1219 * scratch.
1220 */
1221 map = __xe_lrc_pphwsp_map(lrc);
1222 if (gt->default_lrc[hwe->class]) {
1223 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
1224 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
1225 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
1226 lrc_size - LRC_PPHWSP_SIZE);
1227 } else {
1228 void *init_data = empty_lrc_data(hwe);
1229
1230 if (!init_data) {
1231 err = -ENOMEM;
1232 goto err_lrc_finish;
1233 }
1234
1235 xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size);
1236 kfree(init_data);
1237 }
1238
1239 if (vm) {
1240 xe_lrc_set_ppgtt(lrc, vm);
1241
1242 if (vm->xef)
1243 xe_drm_client_add_bo(vm->xef->client, lrc->bo);
1244 }
1245
1246 if (xe_device_has_msix(xe)) {
1247 xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR,
1248 xe_memirq_status_ptr(&tile->memirq, hwe));
1249 xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR,
1250 xe_memirq_source_ptr(&tile->memirq, hwe));
1251 xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec);
1252 }
1253
1254 if (xe_gt_has_indirect_ring_state(gt)) {
1255 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
1256 __xe_lrc_indirect_ring_ggtt_addr(lrc));
1257
1258 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
1259 __xe_lrc_ring_ggtt_addr(lrc));
1260 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
1261 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
1262 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
1263 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
1264 RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
1265 } else {
1266 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
1267 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
1268 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
1269 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
1270 RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
1271 }
1272
1273 if (init_flags & XE_LRC_CREATE_RUNALONE)
1274 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL,
1275 xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
1276 _MASKED_BIT_ENABLE(CTX_CTRL_RUN_ALONE));
1277
1278 if (init_flags & XE_LRC_CREATE_PXP)
1279 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL,
1280 xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
1281 _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE));
1282
1283 lrc->ctx_timestamp = 0;
1284 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
1285 if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
1286 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0);
1287
1288 if (xe->info.has_asid && vm)
1289 xe_lrc_write_ctx_reg(lrc, CTX_ASID, vm->usm.asid);
1290
1291 lrc->desc = LRC_VALID;
1292 lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
1293 /* TODO: Priority */
1294
1295 /* While this appears to have something about privileged batches or
1296 * some such, it really just means PPGTT mode.
1297 */
1298 if (vm)
1299 lrc->desc |= LRC_PRIVILEGE;
1300
1301 if (GRAPHICS_VERx100(xe) < 1250) {
1302 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
1303 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
1304 }
1305
1306 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1307 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
1308
1309 map = __xe_lrc_seqno_map(lrc);
1310 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1311
1312 map = __xe_lrc_start_seqno_map(lrc);
1313 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1314
1315 err = setup_wa_bb(lrc, hwe);
1316 if (err)
1317 goto err_lrc_finish;
1318
1319 err = setup_indirect_ctx(lrc, hwe);
1320 if (err)
1321 goto err_lrc_finish;
1322
1323 return 0;
1324
1325 err_lrc_finish:
1326 xe_lrc_finish(lrc);
1327 return err;
1328 }
1329
1330 /**
1331 * xe_lrc_create - Create a LRC
1332 * @hwe: Hardware Engine
1333 * @vm: The VM (address space)
1334 * @ring_size: LRC ring size
1335 * @msix_vec: MSI-X interrupt vector (for platforms that support it)
1336 * @flags: LRC initialization flags
1337 *
1338 * Allocate and initialize the Logical Ring Context (LRC).
1339 *
1340 * Return pointer to created LRC upon success and an error pointer
1341 * upon failure.
1342 */
xe_lrc_create(struct xe_hw_engine * hwe,struct xe_vm * vm,u32 ring_size,u16 msix_vec,u32 flags)1343 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
1344 u32 ring_size, u16 msix_vec, u32 flags)
1345 {
1346 struct xe_lrc *lrc;
1347 int err;
1348
1349 lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
1350 if (!lrc)
1351 return ERR_PTR(-ENOMEM);
1352
1353 err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags);
1354 if (err) {
1355 kfree(lrc);
1356 return ERR_PTR(err);
1357 }
1358
1359 return lrc;
1360 }
1361
1362 /**
1363 * xe_lrc_destroy - Destroy the LRC
1364 * @ref: reference to LRC
1365 *
1366 * Called when ref == 0, release resources held by the Logical Ring Context
1367 * (LRC) and free the LRC memory.
1368 */
xe_lrc_destroy(struct kref * ref)1369 void xe_lrc_destroy(struct kref *ref)
1370 {
1371 struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
1372
1373 xe_lrc_finish(lrc);
1374 kfree(lrc);
1375 }
1376
xe_lrc_set_ring_tail(struct xe_lrc * lrc,u32 tail)1377 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
1378 {
1379 if (xe_lrc_has_indirect_ring_state(lrc))
1380 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
1381 else
1382 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
1383 }
1384
xe_lrc_ring_tail(struct xe_lrc * lrc)1385 u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1386 {
1387 if (xe_lrc_has_indirect_ring_state(lrc))
1388 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1389 else
1390 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1391 }
1392
xe_lrc_ring_start(struct xe_lrc * lrc)1393 static u32 xe_lrc_ring_start(struct xe_lrc *lrc)
1394 {
1395 if (xe_lrc_has_indirect_ring_state(lrc))
1396 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START);
1397 else
1398 return xe_lrc_read_ctx_reg(lrc, CTX_RING_START);
1399 }
1400
xe_lrc_set_ring_head(struct xe_lrc * lrc,u32 head)1401 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1402 {
1403 if (xe_lrc_has_indirect_ring_state(lrc))
1404 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1405 else
1406 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
1407 }
1408
xe_lrc_ring_head(struct xe_lrc * lrc)1409 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1410 {
1411 if (xe_lrc_has_indirect_ring_state(lrc))
1412 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1413 else
1414 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
1415 }
1416
xe_lrc_ring_space(struct xe_lrc * lrc)1417 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1418 {
1419 const u32 head = xe_lrc_ring_head(lrc);
1420 const u32 tail = lrc->ring.tail;
1421 const u32 size = lrc->ring.size;
1422
1423 return ((head - tail - 1) & (size - 1)) + 1;
1424 }
1425
__xe_lrc_write_ring(struct xe_lrc * lrc,struct iosys_map ring,const void * data,size_t size)1426 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1427 const void *data, size_t size)
1428 {
1429 struct xe_device *xe = lrc_to_xe(lrc);
1430
1431 iosys_map_incr(&ring, lrc->ring.tail);
1432 xe_map_memcpy_to(xe, &ring, 0, data, size);
1433 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1434 }
1435
xe_lrc_write_ring(struct xe_lrc * lrc,const void * data,size_t size)1436 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1437 {
1438 struct xe_device *xe = lrc_to_xe(lrc);
1439 struct iosys_map ring;
1440 u32 rhs;
1441 size_t aligned_size;
1442
1443 xe_assert(xe, IS_ALIGNED(size, 4));
1444 aligned_size = ALIGN(size, 8);
1445
1446 ring = __xe_lrc_ring_map(lrc);
1447
1448 xe_assert(xe, lrc->ring.tail < lrc->ring.size);
1449 rhs = lrc->ring.size - lrc->ring.tail;
1450 if (size > rhs) {
1451 __xe_lrc_write_ring(lrc, ring, data, rhs);
1452 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1453 } else {
1454 __xe_lrc_write_ring(lrc, ring, data, size);
1455 }
1456
1457 if (aligned_size > size) {
1458 u32 noop = MI_NOOP;
1459
1460 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1461 }
1462 }
1463
xe_lrc_descriptor(struct xe_lrc * lrc)1464 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1465 {
1466 return lrc->desc | xe_lrc_ggtt_addr(lrc);
1467 }
1468
xe_lrc_seqno_ggtt_addr(struct xe_lrc * lrc)1469 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1470 {
1471 return __xe_lrc_seqno_ggtt_addr(lrc);
1472 }
1473
1474 /**
1475 * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1476 *
1477 * Allocate but don't initialize an lrc seqno fence.
1478 *
1479 * Return: Pointer to the allocated fence or
1480 * negative error pointer on error.
1481 */
xe_lrc_alloc_seqno_fence(void)1482 struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1483 {
1484 return xe_hw_fence_alloc();
1485 }
1486
1487 /**
1488 * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1489 * @fence: Pointer to the fence to free.
1490 *
1491 * Frees an lrc seqno fence that hasn't yet been
1492 * initialized.
1493 */
xe_lrc_free_seqno_fence(struct dma_fence * fence)1494 void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1495 {
1496 xe_hw_fence_free(fence);
1497 }
1498
1499 /**
1500 * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1501 * @lrc: Pointer to the lrc.
1502 * @fence: Pointer to the fence to initialize.
1503 *
1504 * Initializes a pre-allocated lrc seqno fence.
1505 * After initialization, the fence is subject to normal
1506 * dma-fence refcounting.
1507 */
xe_lrc_init_seqno_fence(struct xe_lrc * lrc,struct dma_fence * fence)1508 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1509 {
1510 xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1511 }
1512
xe_lrc_seqno(struct xe_lrc * lrc)1513 s32 xe_lrc_seqno(struct xe_lrc *lrc)
1514 {
1515 struct iosys_map map = __xe_lrc_seqno_map(lrc);
1516
1517 return xe_map_read32(lrc_to_xe(lrc), &map);
1518 }
1519
xe_lrc_start_seqno(struct xe_lrc * lrc)1520 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1521 {
1522 struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1523
1524 return xe_map_read32(lrc_to_xe(lrc), &map);
1525 }
1526
xe_lrc_start_seqno_ggtt_addr(struct xe_lrc * lrc)1527 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1528 {
1529 return __xe_lrc_start_seqno_ggtt_addr(lrc);
1530 }
1531
xe_lrc_parallel_ggtt_addr(struct xe_lrc * lrc)1532 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1533 {
1534 return __xe_lrc_parallel_ggtt_addr(lrc);
1535 }
1536
xe_lrc_parallel_map(struct xe_lrc * lrc)1537 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1538 {
1539 return __xe_lrc_parallel_map(lrc);
1540 }
1541
1542 /**
1543 * xe_lrc_engine_id() - Read engine id value
1544 * @lrc: Pointer to the lrc.
1545 *
1546 * Returns: context id value
1547 */
xe_lrc_engine_id(struct xe_lrc * lrc)1548 static u32 xe_lrc_engine_id(struct xe_lrc *lrc)
1549 {
1550 struct xe_device *xe = lrc_to_xe(lrc);
1551 struct iosys_map map;
1552
1553 map = __xe_lrc_engine_id_map(lrc);
1554 return xe_map_read32(xe, &map);
1555 }
1556
instr_dw(u32 cmd_header)1557 static int instr_dw(u32 cmd_header)
1558 {
1559 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1560 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1561 GFXPIPE_SINGLE_DW_CMD(0, 0))
1562 return 1;
1563
1564 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1565 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1566 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1567
1568 /* Most instructions have the # of dwords (minus 2) in 7:0 */
1569 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1570 }
1571
dump_mi_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1572 static int dump_mi_command(struct drm_printer *p,
1573 struct xe_gt *gt,
1574 u32 *dw,
1575 int remaining_dw)
1576 {
1577 u32 inst_header = *dw;
1578 u32 numdw = instr_dw(inst_header);
1579 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1580 int num_noop;
1581
1582 /* First check for commands that don't have/use a '# DW' field */
1583 switch (inst_header & MI_OPCODE) {
1584 case MI_NOOP:
1585 num_noop = 1;
1586 while (num_noop < remaining_dw &&
1587 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1588 num_noop++;
1589 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1590 return num_noop;
1591
1592 case MI_TOPOLOGY_FILTER:
1593 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1594 return 1;
1595
1596 case MI_BATCH_BUFFER_END:
1597 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1598 /* Return 'remaining_dw' to consume the rest of the LRC */
1599 return remaining_dw;
1600 }
1601
1602 /*
1603 * Any remaining commands include a # of dwords. We should make sure
1604 * it doesn't exceed the remaining size of the LRC.
1605 */
1606 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1607 numdw = remaining_dw;
1608
1609 switch (inst_header & MI_OPCODE) {
1610 case MI_LOAD_REGISTER_IMM:
1611 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1612 inst_header, (numdw - 1) / 2);
1613 for (int i = 1; i < numdw; i += 2)
1614 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1615 return numdw;
1616
1617 case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1618 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1619 inst_header,
1620 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1621 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1622 if (numdw == 4)
1623 drm_printf(p, " - %#6x = %#010llx\n",
1624 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1625 else
1626 drm_printf(p, " - %*ph (%s)\n",
1627 (int)sizeof(u32) * (numdw - 1), dw + 1,
1628 numdw < 4 ? "truncated" : "malformed");
1629 return numdw;
1630
1631 case MI_FORCE_WAKEUP:
1632 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1633 return numdw;
1634
1635 default:
1636 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1637 inst_header, opcode, numdw);
1638 return numdw;
1639 }
1640 }
1641
dump_gfxpipe_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1642 static int dump_gfxpipe_command(struct drm_printer *p,
1643 struct xe_gt *gt,
1644 u32 *dw,
1645 int remaining_dw)
1646 {
1647 u32 numdw = instr_dw(*dw);
1648 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1649 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1650 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1651
1652 /*
1653 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1654 * remaining size of the LRC.
1655 */
1656 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1657 numdw = remaining_dw;
1658
1659 switch (*dw & GFXPIPE_MATCH_MASK) {
1660 #define MATCH(cmd) \
1661 case cmd: \
1662 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1663 return numdw
1664 #define MATCH3D(cmd) \
1665 case CMD_##cmd: \
1666 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1667 return numdw
1668
1669 MATCH(STATE_BASE_ADDRESS);
1670 MATCH(STATE_SIP);
1671 MATCH(GPGPU_CSR_BASE_ADDRESS);
1672 MATCH(STATE_COMPUTE_MODE);
1673 MATCH3D(3DSTATE_BTD);
1674 MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1675 MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
1676
1677 MATCH3D(3DSTATE_VF_STATISTICS);
1678
1679 MATCH(PIPELINE_SELECT);
1680
1681 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1682 MATCH3D(3DSTATE_CLEAR_PARAMS);
1683 MATCH3D(3DSTATE_DEPTH_BUFFER);
1684 MATCH3D(3DSTATE_STENCIL_BUFFER);
1685 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1686 MATCH3D(3DSTATE_VERTEX_BUFFERS);
1687 MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1688 MATCH3D(3DSTATE_INDEX_BUFFER);
1689 MATCH3D(3DSTATE_VF);
1690 MATCH3D(3DSTATE_MULTISAMPLE);
1691 MATCH3D(3DSTATE_CC_STATE_POINTERS);
1692 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1693 MATCH3D(3DSTATE_VS);
1694 MATCH3D(3DSTATE_GS);
1695 MATCH3D(3DSTATE_CLIP);
1696 MATCH3D(3DSTATE_SF);
1697 MATCH3D(3DSTATE_WM);
1698 MATCH3D(3DSTATE_CONSTANT_VS);
1699 MATCH3D(3DSTATE_CONSTANT_GS);
1700 MATCH3D(3DSTATE_CONSTANT_PS);
1701 MATCH3D(3DSTATE_SAMPLE_MASK);
1702 MATCH3D(3DSTATE_CONSTANT_HS);
1703 MATCH3D(3DSTATE_CONSTANT_DS);
1704 MATCH3D(3DSTATE_HS);
1705 MATCH3D(3DSTATE_TE);
1706 MATCH3D(3DSTATE_DS);
1707 MATCH3D(3DSTATE_STREAMOUT);
1708 MATCH3D(3DSTATE_SBE);
1709 MATCH3D(3DSTATE_PS);
1710 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1711 MATCH3D(3DSTATE_CPS_POINTERS);
1712 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1713 MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1714 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1715 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1716 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1717 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1718 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1719 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1720 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1721 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1722 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1723 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1724 MATCH3D(3DSTATE_VF_INSTANCING);
1725 MATCH3D(3DSTATE_VF_SGVS);
1726 MATCH3D(3DSTATE_VF_TOPOLOGY);
1727 MATCH3D(3DSTATE_WM_CHROMAKEY);
1728 MATCH3D(3DSTATE_PS_BLEND);
1729 MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1730 MATCH3D(3DSTATE_PS_EXTRA);
1731 MATCH3D(3DSTATE_RASTER);
1732 MATCH3D(3DSTATE_SBE_SWIZ);
1733 MATCH3D(3DSTATE_WM_HZ_OP);
1734 MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1735 MATCH3D(3DSTATE_VF_SGVS_2);
1736 MATCH3D(3DSTATE_VFG);
1737 MATCH3D(3DSTATE_URB_ALLOC_VS);
1738 MATCH3D(3DSTATE_URB_ALLOC_HS);
1739 MATCH3D(3DSTATE_URB_ALLOC_DS);
1740 MATCH3D(3DSTATE_URB_ALLOC_GS);
1741 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1742 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1743 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1744 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1745 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1746 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1747 MATCH3D(3DSTATE_AMFS);
1748 MATCH3D(3DSTATE_DEPTH_BOUNDS);
1749 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1750 MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1751 MATCH3D(3DSTATE_MESH_CONTROL);
1752 MATCH3D(3DSTATE_MESH_DISTRIB);
1753 MATCH3D(3DSTATE_TASK_REDISTRIB);
1754 MATCH3D(3DSTATE_MESH_SHADER);
1755 MATCH3D(3DSTATE_MESH_SHADER_DATA);
1756 MATCH3D(3DSTATE_TASK_CONTROL);
1757 MATCH3D(3DSTATE_TASK_SHADER);
1758 MATCH3D(3DSTATE_TASK_SHADER_DATA);
1759 MATCH3D(3DSTATE_URB_ALLOC_MESH);
1760 MATCH3D(3DSTATE_URB_ALLOC_TASK);
1761 MATCH3D(3DSTATE_CLIP_MESH);
1762 MATCH3D(3DSTATE_SBE_MESH);
1763 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1764 MATCH3D(3DSTATE_COARSE_PIXEL);
1765
1766 MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1767 MATCH3D(3DSTATE_CHROMA_KEY);
1768 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1769 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1770 MATCH3D(3DSTATE_LINE_STIPPLE);
1771 MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1772 MATCH3D(3DSTATE_MONOFILTER_SIZE);
1773 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1774 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1775 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1776 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1777 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1778 MATCH3D(3DSTATE_SO_DECL_LIST);
1779 MATCH3D(3DSTATE_SO_BUFFER);
1780 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1781 MATCH3D(3DSTATE_SAMPLE_PATTERN);
1782 MATCH3D(3DSTATE_3D_MODE);
1783 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1784 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1785 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1786
1787 default:
1788 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1789 *dw, pipeline, opcode, subopcode, numdw);
1790 return numdw;
1791 }
1792 }
1793
dump_gfx_state_command(struct drm_printer * p,struct xe_gt * gt,u32 * dw,int remaining_dw)1794 static int dump_gfx_state_command(struct drm_printer *p,
1795 struct xe_gt *gt,
1796 u32 *dw,
1797 int remaining_dw)
1798 {
1799 u32 numdw = instr_dw(*dw);
1800 u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1801
1802 /*
1803 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1804 * remaining size of the LRC.
1805 */
1806 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1807 numdw = remaining_dw;
1808
1809 switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1810 MATCH(STATE_WRITE_INLINE);
1811
1812 default:
1813 drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1814 *dw, opcode, numdw);
1815 return numdw;
1816 }
1817 }
1818
xe_lrc_dump_default(struct drm_printer * p,struct xe_gt * gt,enum xe_engine_class hwe_class)1819 void xe_lrc_dump_default(struct drm_printer *p,
1820 struct xe_gt *gt,
1821 enum xe_engine_class hwe_class)
1822 {
1823 u32 *dw;
1824 int remaining_dw, num_dw;
1825
1826 if (!gt->default_lrc[hwe_class]) {
1827 drm_printf(p, "No default LRC for class %d\n", hwe_class);
1828 return;
1829 }
1830
1831 /*
1832 * Skip the beginning of the LRC since it contains the per-process
1833 * hardware status page.
1834 */
1835 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1836 remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
1837
1838 while (remaining_dw > 0) {
1839 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1840 num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1841 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1842 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1843 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1844 num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
1845 } else {
1846 num_dw = min(instr_dw(*dw), remaining_dw);
1847 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1848 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1849 num_dw);
1850 }
1851
1852 dw += num_dw;
1853 remaining_dw -= num_dw;
1854 }
1855 }
1856
1857 struct instr_state {
1858 u32 instr;
1859 u16 num_dw;
1860 };
1861
1862 static const struct instr_state xe_hpg_svg_state[] = {
1863 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1864 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1865 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1866 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1867 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1868 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1869 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1870 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1871 { .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1872 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1873 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1874 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1875 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1876 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1877 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1878 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1879 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1880 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1881 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1882 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1883 { .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1884 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1885 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1886 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1887 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1888 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1889 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1890 { .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1891 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1892 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1893 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1894 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1895 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1896 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1897 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1898 { .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1899 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1900 { .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1901 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1902 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1903 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1904 { .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1905 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1906 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1907 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1908 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1909 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1910 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1911 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1912 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1913 };
1914
xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue * q,u32 * cs)1915 u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs)
1916 {
1917 struct xe_gt *gt = q->hwe->gt;
1918 struct xe_device *xe = gt_to_xe(gt);
1919 const struct instr_state *state_table = NULL;
1920 int state_table_size = 0;
1921
1922 /*
1923 * Wa_14019789679
1924 *
1925 * If the driver doesn't explicitly emit the SVG instructions while
1926 * setting up the default LRC, the context switch will write 0's
1927 * (noops) into the LRC memory rather than the expected instruction
1928 * headers. Application contexts start out as a copy of the default
1929 * LRC, and if they also do not emit specific settings for some SVG
1930 * state, then on context restore they'll unintentionally inherit
1931 * whatever state setting the previous context had programmed into the
1932 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
1933 * prevent the hardware from resetting that state back to any specific
1934 * value).
1935 *
1936 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
1937 * since that's a specific state setting that can easily cause GPU
1938 * hangs if unintentionally inherited. However to be safe we'll
1939 * continue to emit all of the SVG state since it's best not to leak
1940 * any of the state between contexts, even if that leakage is harmless.
1941 */
1942 if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
1943 state_table = xe_hpg_svg_state;
1944 state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1945 }
1946
1947 if (!state_table) {
1948 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1949 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1950 return cs;
1951 }
1952
1953 for (int i = 0; i < state_table_size; i++) {
1954 u32 instr = state_table[i].instr;
1955 u16 num_dw = state_table[i].num_dw;
1956 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1957
1958 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1959 xe_gt_assert(gt, num_dw != 0);
1960 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1961
1962 /*
1963 * Xe2's SVG context is the same as the one on DG2 / MTL
1964 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1965 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1966 * Just make the replacement here rather than defining a
1967 * whole separate table for the single trivial change.
1968 */
1969 if (GRAPHICS_VER(xe) >= 20 &&
1970 instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1971 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1972
1973 *cs = instr;
1974 if (!is_single_dw)
1975 *cs |= (num_dw - 2);
1976
1977 cs += num_dw;
1978 }
1979
1980 return cs;
1981 }
1982
xe_lrc_snapshot_capture(struct xe_lrc * lrc)1983 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1984 {
1985 struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1986
1987 if (!snapshot)
1988 return NULL;
1989
1990 snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
1991 snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc);
1992 snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
1993 snapshot->head = xe_lrc_ring_head(lrc);
1994 snapshot->tail.internal = lrc->ring.tail;
1995 snapshot->tail.memory = xe_lrc_ring_tail(lrc);
1996 snapshot->start = xe_lrc_ring_start(lrc);
1997 snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1998 snapshot->seqno = xe_lrc_seqno(lrc);
1999 snapshot->lrc_bo = xe_bo_get(lrc->bo);
2000 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
2001 snapshot->lrc_size = lrc->size;
2002 snapshot->lrc_snapshot = NULL;
2003 snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
2004 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
2005 return snapshot;
2006 }
2007
xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot * snapshot)2008 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
2009 {
2010 struct xe_bo *bo;
2011 struct iosys_map src;
2012
2013 if (!snapshot)
2014 return;
2015
2016 bo = snapshot->lrc_bo;
2017 snapshot->lrc_bo = NULL;
2018
2019 snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
2020 if (!snapshot->lrc_snapshot)
2021 goto put_bo;
2022
2023 xe_bo_lock(bo, false);
2024 if (!ttm_bo_vmap(&bo->ttm, &src)) {
2025 xe_map_memcpy_from(xe_bo_device(bo),
2026 snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
2027 snapshot->lrc_size);
2028 ttm_bo_vunmap(&bo->ttm, &src);
2029 } else {
2030 kvfree(snapshot->lrc_snapshot);
2031 snapshot->lrc_snapshot = NULL;
2032 }
2033 xe_bo_unlock(bo);
2034 put_bo:
2035 xe_bo_put(bo);
2036 }
2037
xe_lrc_snapshot_print(struct xe_lrc_snapshot * snapshot,struct drm_printer * p)2038 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
2039 {
2040 unsigned long i;
2041
2042 if (!snapshot)
2043 return;
2044
2045 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
2046 drm_printf(p, "\tHW Ring address: 0x%08x\n",
2047 snapshot->ring_addr);
2048 drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
2049 snapshot->indirect_context_desc);
2050 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
2051 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
2052 snapshot->tail.internal, snapshot->tail.memory);
2053 drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start);
2054 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
2055 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
2056 drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
2057 drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
2058
2059 if (!snapshot->lrc_snapshot)
2060 return;
2061
2062 drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
2063 drm_puts(p, "\t[HWSP].data: ");
2064 for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
2065 u32 *val = snapshot->lrc_snapshot + i;
2066 char dumped[ASCII85_BUFSZ];
2067
2068 drm_puts(p, ascii85_encode(*val, dumped));
2069 }
2070
2071 drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
2072 drm_puts(p, "\t[HWCTX].data: ");
2073 for (; i < snapshot->lrc_size; i += sizeof(u32)) {
2074 u32 *val = snapshot->lrc_snapshot + i;
2075 char dumped[ASCII85_BUFSZ];
2076
2077 drm_puts(p, ascii85_encode(*val, dumped));
2078 }
2079 drm_puts(p, "\n");
2080 }
2081
xe_lrc_snapshot_free(struct xe_lrc_snapshot * snapshot)2082 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
2083 {
2084 if (!snapshot)
2085 return;
2086
2087 kvfree(snapshot->lrc_snapshot);
2088 if (snapshot->lrc_bo)
2089 xe_bo_put(snapshot->lrc_bo);
2090
2091 kfree(snapshot);
2092 }
2093
get_ctx_timestamp(struct xe_lrc * lrc,u32 engine_id,u64 * reg_ctx_ts)2094 static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
2095 {
2096 u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id);
2097 u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id);
2098 struct xe_hw_engine *hwe;
2099 u64 val;
2100
2101 hwe = xe_gt_hw_engine(lrc->gt, class, instance, false);
2102 if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe),
2103 "Unexpected engine class:instance %d:%d for context utilization\n",
2104 class, instance))
2105 return -1;
2106
2107 if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
2108 val = xe_mmio_read64_2x32(&hwe->gt->mmio,
2109 RING_CTX_TIMESTAMP(hwe->mmio_base));
2110 else
2111 val = xe_mmio_read32(&hwe->gt->mmio,
2112 RING_CTX_TIMESTAMP(hwe->mmio_base));
2113
2114 *reg_ctx_ts = val;
2115
2116 return 0;
2117 }
2118
2119 /**
2120 * xe_lrc_update_timestamp() - Update ctx timestamp
2121 * @lrc: Pointer to the lrc.
2122 * @old_ts: Old timestamp value
2123 *
2124 * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
2125 * update saved value. With support for active contexts, the calculation may be
2126 * slightly racy, so follow a read-again logic to ensure that the context is
2127 * still active before returning the right timestamp.
2128 *
2129 * Returns: New ctx timestamp value
2130 */
xe_lrc_update_timestamp(struct xe_lrc * lrc,u64 * old_ts)2131 u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts)
2132 {
2133 u64 lrc_ts, reg_ts;
2134 u32 engine_id;
2135
2136 *old_ts = lrc->ctx_timestamp;
2137
2138 lrc_ts = xe_lrc_ctx_timestamp(lrc);
2139 /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */
2140 if (IS_SRIOV_VF(lrc_to_xe(lrc))) {
2141 lrc->ctx_timestamp = lrc_ts;
2142 goto done;
2143 }
2144
2145 if (lrc_ts == CONTEXT_ACTIVE) {
2146 engine_id = xe_lrc_engine_id(lrc);
2147 if (!get_ctx_timestamp(lrc, engine_id, ®_ts))
2148 lrc->ctx_timestamp = reg_ts;
2149
2150 /* read lrc again to ensure context is still active */
2151 lrc_ts = xe_lrc_ctx_timestamp(lrc);
2152 }
2153
2154 /*
2155 * If context switched out, just use the lrc_ts. Note that this needs to
2156 * be a separate if condition.
2157 */
2158 if (lrc_ts != CONTEXT_ACTIVE)
2159 lrc->ctx_timestamp = lrc_ts;
2160
2161 done:
2162 trace_xe_lrc_update_timestamp(lrc, *old_ts);
2163
2164 return lrc->ctx_timestamp;
2165 }
2166
2167 /**
2168 * xe_lrc_ring_is_idle() - LRC is idle
2169 * @lrc: Pointer to the lrc.
2170 *
2171 * Compare LRC ring head and tail to determine if idle.
2172 *
2173 * Return: True is ring is idle, False otherwise
2174 */
xe_lrc_ring_is_idle(struct xe_lrc * lrc)2175 bool xe_lrc_ring_is_idle(struct xe_lrc *lrc)
2176 {
2177 return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc);
2178 }
2179