1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include <linux/ascii85.h> 9 10 #include "instructions/xe_mi_commands.h" 11 #include "instructions/xe_gfxpipe_commands.h" 12 #include "instructions/xe_gfx_state_commands.h" 13 #include "regs/xe_engine_regs.h" 14 #include "regs/xe_gpu_commands.h" 15 #include "regs/xe_lrc_layout.h" 16 #include "xe_bb.h" 17 #include "xe_bo.h" 18 #include "xe_device.h" 19 #include "xe_drm_client.h" 20 #include "xe_exec_queue_types.h" 21 #include "xe_gt.h" 22 #include "xe_gt_printk.h" 23 #include "xe_hw_fence.h" 24 #include "xe_map.h" 25 #include "xe_memirq.h" 26 #include "xe_sriov.h" 27 #include "xe_vm.h" 28 29 #define LRC_VALID BIT_ULL(0) 30 #define LRC_PRIVILEGE BIT_ULL(8) 31 #define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) 32 #define LRC_LEGACY_64B_CONTEXT 3 33 34 #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) 35 #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) 36 37 struct xe_lrc_snapshot { 38 struct xe_bo *lrc_bo; 39 void *lrc_snapshot; 40 unsigned long lrc_size, lrc_offset; 41 42 u32 context_desc; 43 u32 head; 44 struct { 45 u32 internal; 46 u32 memory; 47 } tail; 48 u32 start_seqno; 49 u32 seqno; 50 }; 51 52 static struct xe_device * 53 lrc_to_xe(struct xe_lrc *lrc) 54 { 55 return gt_to_xe(lrc->fence_ctx.gt); 56 } 57 58 size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) 59 { 60 switch (class) { 61 case XE_ENGINE_CLASS_RENDER: 62 if (GRAPHICS_VER(xe) >= 20) 63 return 4 * SZ_4K; 64 else 65 return 14 * SZ_4K; 66 case XE_ENGINE_CLASS_COMPUTE: 67 /* 14 pages since graphics_ver == 11 */ 68 if (GRAPHICS_VER(xe) >= 20) 69 return 3 * SZ_4K; 70 else 71 return 14 * SZ_4K; 72 default: 73 WARN(1, "Unknown engine class: %d", class); 74 fallthrough; 75 case XE_ENGINE_CLASS_COPY: 76 case XE_ENGINE_CLASS_VIDEO_DECODE: 77 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 78 case XE_ENGINE_CLASS_OTHER: 79 return 2 * SZ_4K; 80 } 81 } 82 83 /* 84 * The per-platform tables are u8-encoded in @data. Decode @data and set the 85 * addresses' offset and commands in @regs. The following encoding is used 86 * for each byte. There are 2 steps: decoding commands and decoding addresses. 87 * 88 * Commands: 89 * [7]: create NOPs - number of NOPs are set in lower bits 90 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 91 * MI_LRI_FORCE_POSTED 92 * [5:0]: Number of NOPs or registers to set values to in case of 93 * MI_LOAD_REGISTER_IMM 94 * 95 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 96 * number of registers. They are set by using the REG/REG16 macros: the former 97 * is used for offsets smaller than 0x200 while the latter is for values bigger 98 * than that. Those macros already set all the bits documented below correctly: 99 * 100 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 101 * follow, for the lower bits 102 * [6:0]: Register offset, without considering the engine base. 103 * 104 * This function only tweaks the commands and register offsets. Values are not 105 * filled out. 106 */ 107 static void set_offsets(u32 *regs, 108 const u8 *data, 109 const struct xe_hw_engine *hwe) 110 #define NOP(x) (BIT(7) | (x)) 111 #define LRI(count, flags) ((flags) << 6 | (count) | \ 112 BUILD_BUG_ON_ZERO(count >= BIT(6))) 113 #define POSTED BIT(0) 114 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 115 #define REG16(x) \ 116 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 117 (((x) >> 2) & 0x7f) 118 { 119 const u32 base = hwe->mmio_base; 120 121 while (*data) { 122 u8 count, flags; 123 124 if (*data & BIT(7)) { /* skip */ 125 count = *data++ & ~BIT(7); 126 regs += count; 127 continue; 128 } 129 130 count = *data & 0x3f; 131 flags = *data >> 6; 132 data++; 133 134 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 135 if (flags & POSTED) 136 *regs |= MI_LRI_FORCE_POSTED; 137 *regs |= MI_LRI_LRM_CS_MMIO; 138 regs++; 139 140 xe_gt_assert(hwe->gt, count); 141 do { 142 u32 offset = 0; 143 u8 v; 144 145 do { 146 v = *data++; 147 offset <<= 7; 148 offset |= v & ~BIT(7); 149 } while (v & BIT(7)); 150 151 regs[0] = base + (offset << 2); 152 regs += 2; 153 } while (--count); 154 } 155 156 *regs = MI_BATCH_BUFFER_END | BIT(0); 157 } 158 159 static const u8 gen12_xcs_offsets[] = { 160 NOP(1), 161 LRI(13, POSTED), 162 REG16(0x244), 163 REG(0x034), 164 REG(0x030), 165 REG(0x038), 166 REG(0x03c), 167 REG(0x168), 168 REG(0x140), 169 REG(0x110), 170 REG(0x1c0), 171 REG(0x1c4), 172 REG(0x1c8), 173 REG(0x180), 174 REG16(0x2b4), 175 176 NOP(5), 177 LRI(9, POSTED), 178 REG16(0x3a8), 179 REG16(0x28c), 180 REG16(0x288), 181 REG16(0x284), 182 REG16(0x280), 183 REG16(0x27c), 184 REG16(0x278), 185 REG16(0x274), 186 REG16(0x270), 187 188 0 189 }; 190 191 static const u8 dg2_xcs_offsets[] = { 192 NOP(1), 193 LRI(15, POSTED), 194 REG16(0x244), 195 REG(0x034), 196 REG(0x030), 197 REG(0x038), 198 REG(0x03c), 199 REG(0x168), 200 REG(0x140), 201 REG(0x110), 202 REG(0x1c0), 203 REG(0x1c4), 204 REG(0x1c8), 205 REG(0x180), 206 REG16(0x2b4), 207 REG(0x120), 208 REG(0x124), 209 210 NOP(1), 211 LRI(9, POSTED), 212 REG16(0x3a8), 213 REG16(0x28c), 214 REG16(0x288), 215 REG16(0x284), 216 REG16(0x280), 217 REG16(0x27c), 218 REG16(0x278), 219 REG16(0x274), 220 REG16(0x270), 221 222 0 223 }; 224 225 static const u8 gen12_rcs_offsets[] = { 226 NOP(1), 227 LRI(13, POSTED), 228 REG16(0x244), 229 REG(0x034), 230 REG(0x030), 231 REG(0x038), 232 REG(0x03c), 233 REG(0x168), 234 REG(0x140), 235 REG(0x110), 236 REG(0x1c0), 237 REG(0x1c4), 238 REG(0x1c8), 239 REG(0x180), 240 REG16(0x2b4), 241 242 NOP(5), 243 LRI(9, POSTED), 244 REG16(0x3a8), 245 REG16(0x28c), 246 REG16(0x288), 247 REG16(0x284), 248 REG16(0x280), 249 REG16(0x27c), 250 REG16(0x278), 251 REG16(0x274), 252 REG16(0x270), 253 254 LRI(3, POSTED), 255 REG(0x1b0), 256 REG16(0x5a8), 257 REG16(0x5ac), 258 259 NOP(6), 260 LRI(1, 0), 261 REG(0x0c8), 262 NOP(3 + 9 + 1), 263 264 LRI(51, POSTED), 265 REG16(0x588), 266 REG16(0x588), 267 REG16(0x588), 268 REG16(0x588), 269 REG16(0x588), 270 REG16(0x588), 271 REG(0x028), 272 REG(0x09c), 273 REG(0x0c0), 274 REG(0x178), 275 REG(0x17c), 276 REG16(0x358), 277 REG(0x170), 278 REG(0x150), 279 REG(0x154), 280 REG(0x158), 281 REG16(0x41c), 282 REG16(0x600), 283 REG16(0x604), 284 REG16(0x608), 285 REG16(0x60c), 286 REG16(0x610), 287 REG16(0x614), 288 REG16(0x618), 289 REG16(0x61c), 290 REG16(0x620), 291 REG16(0x624), 292 REG16(0x628), 293 REG16(0x62c), 294 REG16(0x630), 295 REG16(0x634), 296 REG16(0x638), 297 REG16(0x63c), 298 REG16(0x640), 299 REG16(0x644), 300 REG16(0x648), 301 REG16(0x64c), 302 REG16(0x650), 303 REG16(0x654), 304 REG16(0x658), 305 REG16(0x65c), 306 REG16(0x660), 307 REG16(0x664), 308 REG16(0x668), 309 REG16(0x66c), 310 REG16(0x670), 311 REG16(0x674), 312 REG16(0x678), 313 REG16(0x67c), 314 REG(0x068), 315 REG(0x084), 316 NOP(1), 317 318 0 319 }; 320 321 static const u8 xehp_rcs_offsets[] = { 322 NOP(1), 323 LRI(13, POSTED), 324 REG16(0x244), 325 REG(0x034), 326 REG(0x030), 327 REG(0x038), 328 REG(0x03c), 329 REG(0x168), 330 REG(0x140), 331 REG(0x110), 332 REG(0x1c0), 333 REG(0x1c4), 334 REG(0x1c8), 335 REG(0x180), 336 REG16(0x2b4), 337 338 NOP(5), 339 LRI(9, POSTED), 340 REG16(0x3a8), 341 REG16(0x28c), 342 REG16(0x288), 343 REG16(0x284), 344 REG16(0x280), 345 REG16(0x27c), 346 REG16(0x278), 347 REG16(0x274), 348 REG16(0x270), 349 350 LRI(3, POSTED), 351 REG(0x1b0), 352 REG16(0x5a8), 353 REG16(0x5ac), 354 355 NOP(6), 356 LRI(1, 0), 357 REG(0x0c8), 358 359 0 360 }; 361 362 static const u8 dg2_rcs_offsets[] = { 363 NOP(1), 364 LRI(15, POSTED), 365 REG16(0x244), 366 REG(0x034), 367 REG(0x030), 368 REG(0x038), 369 REG(0x03c), 370 REG(0x168), 371 REG(0x140), 372 REG(0x110), 373 REG(0x1c0), 374 REG(0x1c4), 375 REG(0x1c8), 376 REG(0x180), 377 REG16(0x2b4), 378 REG(0x120), 379 REG(0x124), 380 381 NOP(1), 382 LRI(9, POSTED), 383 REG16(0x3a8), 384 REG16(0x28c), 385 REG16(0x288), 386 REG16(0x284), 387 REG16(0x280), 388 REG16(0x27c), 389 REG16(0x278), 390 REG16(0x274), 391 REG16(0x270), 392 393 LRI(3, POSTED), 394 REG(0x1b0), 395 REG16(0x5a8), 396 REG16(0x5ac), 397 398 NOP(6), 399 LRI(1, 0), 400 REG(0x0c8), 401 402 0 403 }; 404 405 static const u8 mtl_rcs_offsets[] = { 406 NOP(1), 407 LRI(15, POSTED), 408 REG16(0x244), 409 REG(0x034), 410 REG(0x030), 411 REG(0x038), 412 REG(0x03c), 413 REG(0x168), 414 REG(0x140), 415 REG(0x110), 416 REG(0x1c0), 417 REG(0x1c4), 418 REG(0x1c8), 419 REG(0x180), 420 REG16(0x2b4), 421 REG(0x120), 422 REG(0x124), 423 424 NOP(1), 425 LRI(9, POSTED), 426 REG16(0x3a8), 427 REG16(0x28c), 428 REG16(0x288), 429 REG16(0x284), 430 REG16(0x280), 431 REG16(0x27c), 432 REG16(0x278), 433 REG16(0x274), 434 REG16(0x270), 435 436 NOP(2), 437 LRI(2, POSTED), 438 REG16(0x5a8), 439 REG16(0x5ac), 440 441 NOP(6), 442 LRI(1, 0), 443 REG(0x0c8), 444 445 0 446 }; 447 448 #define XE2_CTX_COMMON \ 449 NOP(1), /* [0x00] */ \ 450 LRI(15, POSTED), /* [0x01] */ \ 451 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 452 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 453 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 454 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 455 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 456 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 457 REG(0x140), /* [0x0e] BB_ADDR */ \ 458 REG(0x110), /* [0x10] BB_STATE */ \ 459 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 460 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 461 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 462 REG(0x180), /* [0x18] CCID */ \ 463 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 464 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 465 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 466 \ 467 NOP(1), /* [0x20] */ \ 468 LRI(9, POSTED), /* [0x21] */ \ 469 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 470 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 471 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 472 REG16(0x284), /* [0x28] dummy reg */ \ 473 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 474 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 475 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 476 REG16(0x274), /* [0x30] PTBP_UDW */ \ 477 REG16(0x270) /* [0x32] PTBP_LDW */ 478 479 static const u8 xe2_rcs_offsets[] = { 480 XE2_CTX_COMMON, 481 482 NOP(2), /* [0x34] */ 483 LRI(2, POSTED), /* [0x36] */ 484 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 485 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 486 487 NOP(6), /* [0x41] */ 488 LRI(1, 0), /* [0x47] */ 489 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 490 491 0 492 }; 493 494 static const u8 xe2_bcs_offsets[] = { 495 XE2_CTX_COMMON, 496 497 NOP(4 + 8 + 1), /* [0x34] */ 498 LRI(2, POSTED), /* [0x41] */ 499 REG16(0x200), /* [0x42] BCS_SWCTRL */ 500 REG16(0x204), /* [0x44] BLIT_CCTL */ 501 502 0 503 }; 504 505 static const u8 xe2_xcs_offsets[] = { 506 XE2_CTX_COMMON, 507 508 0 509 }; 510 511 #undef REG16 512 #undef REG 513 #undef LRI 514 #undef NOP 515 516 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 517 { 518 if (class == XE_ENGINE_CLASS_RENDER) { 519 if (GRAPHICS_VER(xe) >= 20) 520 return xe2_rcs_offsets; 521 else if (GRAPHICS_VERx100(xe) >= 1270) 522 return mtl_rcs_offsets; 523 else if (GRAPHICS_VERx100(xe) >= 1255) 524 return dg2_rcs_offsets; 525 else if (GRAPHICS_VERx100(xe) >= 1250) 526 return xehp_rcs_offsets; 527 else 528 return gen12_rcs_offsets; 529 } else if (class == XE_ENGINE_CLASS_COPY) { 530 if (GRAPHICS_VER(xe) >= 20) 531 return xe2_bcs_offsets; 532 else 533 return gen12_xcs_offsets; 534 } else { 535 if (GRAPHICS_VER(xe) >= 20) 536 return xe2_xcs_offsets; 537 else if (GRAPHICS_VERx100(xe) >= 1255) 538 return dg2_xcs_offsets; 539 else 540 return gen12_xcs_offsets; 541 } 542 } 543 544 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 545 { 546 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | 547 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 548 549 /* TODO: Timestamp */ 550 } 551 552 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) 553 { 554 struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq; 555 struct xe_device *xe = gt_to_xe(hwe->gt); 556 557 if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe)) 558 return; 559 560 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | 561 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; 562 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 563 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 564 565 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | 566 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 567 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 568 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq); 569 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 570 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq); 571 } 572 573 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 574 { 575 struct xe_device *xe = gt_to_xe(hwe->gt); 576 577 if (GRAPHICS_VERx100(xe) >= 1250) 578 return 0x70; 579 else 580 return 0x60; 581 } 582 583 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 584 { 585 int x; 586 587 x = lrc_ring_mi_mode(hwe); 588 regs[x + 1] &= ~STOP_RING; 589 regs[x + 1] |= STOP_RING << 16; 590 } 591 592 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 593 { 594 return 0; 595 } 596 597 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 598 { 599 return lrc->ring.size; 600 } 601 602 /* Make the magic macros work */ 603 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 604 605 #define LRC_SEQNO_PPHWSP_OFFSET 512 606 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 607 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 608 #define LRC_PPHWSP_SIZE SZ_4K 609 610 static size_t lrc_reg_size(struct xe_device *xe) 611 { 612 if (GRAPHICS_VERx100(xe) >= 1250) 613 return 96 * sizeof(u32); 614 else 615 return 80 * sizeof(u32); 616 } 617 618 size_t xe_lrc_skip_size(struct xe_device *xe) 619 { 620 return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 621 } 622 623 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 624 { 625 /* The seqno is stored in the driver-defined portion of PPHWSP */ 626 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 627 } 628 629 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 630 { 631 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 632 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 633 } 634 635 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 636 { 637 /* The parallel is stored in the driver-defined portion of PPHWSP */ 638 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 639 } 640 641 static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) 642 { 643 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 644 } 645 646 #define DECL_MAP_ADDR_HELPERS(elem) \ 647 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 648 { \ 649 struct iosys_map map = lrc->bo->vmap; \ 650 \ 651 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 652 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 653 return map; \ 654 } \ 655 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 656 { \ 657 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 658 } \ 659 660 DECL_MAP_ADDR_HELPERS(ring) 661 DECL_MAP_ADDR_HELPERS(pphwsp) 662 DECL_MAP_ADDR_HELPERS(seqno) 663 DECL_MAP_ADDR_HELPERS(regs) 664 DECL_MAP_ADDR_HELPERS(start_seqno) 665 DECL_MAP_ADDR_HELPERS(parallel) 666 667 #undef DECL_MAP_ADDR_HELPERS 668 669 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 670 { 671 return __xe_lrc_pphwsp_ggtt_addr(lrc); 672 } 673 674 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 675 { 676 struct xe_device *xe = lrc_to_xe(lrc); 677 struct iosys_map map; 678 679 map = __xe_lrc_regs_map(lrc); 680 iosys_map_incr(&map, reg_nr * sizeof(u32)); 681 return xe_map_read32(xe, &map); 682 } 683 684 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 685 { 686 struct xe_device *xe = lrc_to_xe(lrc); 687 struct iosys_map map; 688 689 map = __xe_lrc_regs_map(lrc); 690 iosys_map_incr(&map, reg_nr * sizeof(u32)); 691 xe_map_write32(xe, &map, val); 692 } 693 694 static void *empty_lrc_data(struct xe_hw_engine *hwe) 695 { 696 struct xe_device *xe = gt_to_xe(hwe->gt); 697 void *data; 698 u32 *regs; 699 700 data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); 701 if (!data) 702 return NULL; 703 704 /* 1st page: Per-Process of HW status Page */ 705 regs = data + LRC_PPHWSP_SIZE; 706 set_offsets(regs, reg_offsets(xe, hwe->class), hwe); 707 set_context_control(regs, hwe); 708 set_memory_based_intr(regs, hwe); 709 reset_stop_ring(regs, hwe); 710 711 return data; 712 } 713 714 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 715 { 716 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); 717 718 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 719 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 720 } 721 722 #define PVC_CTX_ASID (0x2e + 1) 723 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 724 725 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 726 struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) 727 { 728 struct xe_gt *gt = hwe->gt; 729 struct xe_tile *tile = gt_to_tile(gt); 730 struct xe_device *xe = gt_to_xe(gt); 731 struct iosys_map map; 732 void *init_data = NULL; 733 u32 arb_enable; 734 int err; 735 736 lrc->flags = 0; 737 738 /* 739 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 740 * via VM bind calls. 741 */ 742 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, 743 ring_size + xe_lrc_size(xe, hwe->class), 744 ttm_bo_type_kernel, 745 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 746 XE_BO_FLAG_GGTT | 747 XE_BO_FLAG_GGTT_INVALIDATE); 748 if (IS_ERR(lrc->bo)) 749 return PTR_ERR(lrc->bo); 750 751 lrc->tile = gt_to_tile(hwe->gt); 752 lrc->ring.size = ring_size; 753 lrc->ring.tail = 0; 754 755 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 756 hwe->fence_irq, hwe->name); 757 758 if (!gt->default_lrc[hwe->class]) { 759 init_data = empty_lrc_data(hwe); 760 if (!init_data) { 761 err = -ENOMEM; 762 goto err_lrc_finish; 763 } 764 } 765 766 /* 767 * Init Per-Process of HW status Page, LRC / context state to known 768 * values 769 */ 770 map = __xe_lrc_pphwsp_map(lrc); 771 if (!init_data) { 772 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 773 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 774 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 775 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); 776 } else { 777 xe_map_memcpy_to(xe, &map, 0, init_data, 778 xe_lrc_size(xe, hwe->class)); 779 kfree(init_data); 780 } 781 782 if (vm) { 783 xe_lrc_set_ppgtt(lrc, vm); 784 785 if (vm->xef) 786 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 787 } 788 789 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 790 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 791 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 792 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 793 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 794 if (xe->info.has_asid && vm) 795 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); 796 797 lrc->desc = LRC_VALID; 798 lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); 799 /* TODO: Priority */ 800 801 /* While this appears to have something about privileged batches or 802 * some such, it really just means PPGTT mode. 803 */ 804 if (vm) 805 lrc->desc |= LRC_PRIVILEGE; 806 807 if (GRAPHICS_VERx100(xe) < 1250) { 808 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); 809 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); 810 } 811 812 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 813 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 814 815 map = __xe_lrc_seqno_map(lrc); 816 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 817 818 map = __xe_lrc_start_seqno_map(lrc); 819 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 820 821 return 0; 822 823 err_lrc_finish: 824 xe_lrc_finish(lrc); 825 return err; 826 } 827 828 void xe_lrc_finish(struct xe_lrc *lrc) 829 { 830 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 831 xe_bo_lock(lrc->bo, false); 832 xe_bo_unpin(lrc->bo); 833 xe_bo_unlock(lrc->bo); 834 xe_bo_put(lrc->bo); 835 } 836 837 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 838 { 839 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 840 } 841 842 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 843 { 844 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 845 } 846 847 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 848 { 849 const u32 head = xe_lrc_ring_head(lrc); 850 const u32 tail = lrc->ring.tail; 851 const u32 size = lrc->ring.size; 852 853 return ((head - tail - 1) & (size - 1)) + 1; 854 } 855 856 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 857 const void *data, size_t size) 858 { 859 struct xe_device *xe = lrc_to_xe(lrc); 860 861 iosys_map_incr(&ring, lrc->ring.tail); 862 xe_map_memcpy_to(xe, &ring, 0, data, size); 863 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 864 } 865 866 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 867 { 868 struct xe_device *xe = lrc_to_xe(lrc); 869 struct iosys_map ring; 870 u32 rhs; 871 size_t aligned_size; 872 873 xe_assert(xe, IS_ALIGNED(size, 4)); 874 aligned_size = ALIGN(size, 8); 875 876 ring = __xe_lrc_ring_map(lrc); 877 878 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 879 rhs = lrc->ring.size - lrc->ring.tail; 880 if (size > rhs) { 881 __xe_lrc_write_ring(lrc, ring, data, rhs); 882 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 883 } else { 884 __xe_lrc_write_ring(lrc, ring, data, size); 885 } 886 887 if (aligned_size > size) { 888 u32 noop = MI_NOOP; 889 890 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 891 } 892 } 893 894 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 895 { 896 return lrc->desc | xe_lrc_ggtt_addr(lrc); 897 } 898 899 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 900 { 901 return __xe_lrc_seqno_ggtt_addr(lrc); 902 } 903 904 struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc) 905 { 906 return &xe_hw_fence_create(&lrc->fence_ctx, 907 __xe_lrc_seqno_map(lrc))->dma; 908 } 909 910 s32 xe_lrc_seqno(struct xe_lrc *lrc) 911 { 912 struct iosys_map map = __xe_lrc_seqno_map(lrc); 913 914 return xe_map_read32(lrc_to_xe(lrc), &map); 915 } 916 917 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 918 { 919 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 920 921 return xe_map_read32(lrc_to_xe(lrc), &map); 922 } 923 924 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 925 { 926 return __xe_lrc_start_seqno_ggtt_addr(lrc); 927 } 928 929 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 930 { 931 return __xe_lrc_parallel_ggtt_addr(lrc); 932 } 933 934 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 935 { 936 return __xe_lrc_parallel_map(lrc); 937 } 938 939 static int instr_dw(u32 cmd_header) 940 { 941 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 942 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 943 GFXPIPE_SINGLE_DW_CMD(0, 0)) 944 return 1; 945 946 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 947 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 948 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 949 950 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 951 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 952 } 953 954 static int dump_mi_command(struct drm_printer *p, 955 struct xe_gt *gt, 956 u32 *dw, 957 int remaining_dw) 958 { 959 u32 inst_header = *dw; 960 u32 numdw = instr_dw(inst_header); 961 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 962 int num_noop; 963 964 /* First check for commands that don't have/use a '# DW' field */ 965 switch (inst_header & MI_OPCODE) { 966 case MI_NOOP: 967 num_noop = 1; 968 while (num_noop < remaining_dw && 969 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 970 num_noop++; 971 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 972 return num_noop; 973 974 case MI_TOPOLOGY_FILTER: 975 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 976 return 1; 977 978 case MI_BATCH_BUFFER_END: 979 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 980 /* Return 'remaining_dw' to consume the rest of the LRC */ 981 return remaining_dw; 982 } 983 984 /* 985 * Any remaining commands include a # of dwords. We should make sure 986 * it doesn't exceed the remaining size of the LRC. 987 */ 988 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 989 numdw = remaining_dw; 990 991 switch (inst_header & MI_OPCODE) { 992 case MI_LOAD_REGISTER_IMM: 993 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 994 inst_header, (numdw - 1) / 2); 995 for (int i = 1; i < numdw; i += 2) 996 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 997 return numdw; 998 999 case MI_LOAD_REGISTER_MEM & MI_OPCODE: 1000 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", 1001 inst_header, 1002 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", 1003 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); 1004 if (numdw == 4) 1005 drm_printf(p, " - %#6x = %#010llx\n", 1006 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); 1007 else 1008 drm_printf(p, " - %*ph (%s)\n", 1009 (int)sizeof(u32) * (numdw - 1), dw + 1, 1010 numdw < 4 ? "truncated" : "malformed"); 1011 return numdw; 1012 1013 case MI_FORCE_WAKEUP: 1014 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 1015 return numdw; 1016 1017 default: 1018 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 1019 inst_header, opcode, numdw); 1020 return numdw; 1021 } 1022 } 1023 1024 static int dump_gfxpipe_command(struct drm_printer *p, 1025 struct xe_gt *gt, 1026 u32 *dw, 1027 int remaining_dw) 1028 { 1029 u32 numdw = instr_dw(*dw); 1030 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 1031 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 1032 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 1033 1034 /* 1035 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1036 * remaining size of the LRC. 1037 */ 1038 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1039 numdw = remaining_dw; 1040 1041 switch (*dw & GFXPIPE_MATCH_MASK) { 1042 #define MATCH(cmd) \ 1043 case cmd: \ 1044 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1045 return numdw 1046 #define MATCH3D(cmd) \ 1047 case CMD_##cmd: \ 1048 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1049 return numdw 1050 1051 MATCH(STATE_BASE_ADDRESS); 1052 MATCH(STATE_SIP); 1053 MATCH(GPGPU_CSR_BASE_ADDRESS); 1054 MATCH(STATE_COMPUTE_MODE); 1055 MATCH3D(3DSTATE_BTD); 1056 MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); 1057 MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); 1058 1059 MATCH3D(3DSTATE_VF_STATISTICS); 1060 1061 MATCH(PIPELINE_SELECT); 1062 1063 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1064 MATCH3D(3DSTATE_CLEAR_PARAMS); 1065 MATCH3D(3DSTATE_DEPTH_BUFFER); 1066 MATCH3D(3DSTATE_STENCIL_BUFFER); 1067 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1068 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1069 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1070 MATCH3D(3DSTATE_INDEX_BUFFER); 1071 MATCH3D(3DSTATE_VF); 1072 MATCH3D(3DSTATE_MULTISAMPLE); 1073 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1074 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1075 MATCH3D(3DSTATE_VS); 1076 MATCH3D(3DSTATE_GS); 1077 MATCH3D(3DSTATE_CLIP); 1078 MATCH3D(3DSTATE_SF); 1079 MATCH3D(3DSTATE_WM); 1080 MATCH3D(3DSTATE_CONSTANT_VS); 1081 MATCH3D(3DSTATE_CONSTANT_GS); 1082 MATCH3D(3DSTATE_CONSTANT_PS); 1083 MATCH3D(3DSTATE_SAMPLE_MASK); 1084 MATCH3D(3DSTATE_CONSTANT_HS); 1085 MATCH3D(3DSTATE_CONSTANT_DS); 1086 MATCH3D(3DSTATE_HS); 1087 MATCH3D(3DSTATE_TE); 1088 MATCH3D(3DSTATE_DS); 1089 MATCH3D(3DSTATE_STREAMOUT); 1090 MATCH3D(3DSTATE_SBE); 1091 MATCH3D(3DSTATE_PS); 1092 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1093 MATCH3D(3DSTATE_CPS_POINTERS); 1094 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1095 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1096 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1097 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1098 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1099 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1100 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1101 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1102 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1103 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1104 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1105 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1106 MATCH3D(3DSTATE_VF_INSTANCING); 1107 MATCH3D(3DSTATE_VF_SGVS); 1108 MATCH3D(3DSTATE_VF_TOPOLOGY); 1109 MATCH3D(3DSTATE_WM_CHROMAKEY); 1110 MATCH3D(3DSTATE_PS_BLEND); 1111 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1112 MATCH3D(3DSTATE_PS_EXTRA); 1113 MATCH3D(3DSTATE_RASTER); 1114 MATCH3D(3DSTATE_SBE_SWIZ); 1115 MATCH3D(3DSTATE_WM_HZ_OP); 1116 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1117 MATCH3D(3DSTATE_VF_SGVS_2); 1118 MATCH3D(3DSTATE_VFG); 1119 MATCH3D(3DSTATE_URB_ALLOC_VS); 1120 MATCH3D(3DSTATE_URB_ALLOC_HS); 1121 MATCH3D(3DSTATE_URB_ALLOC_DS); 1122 MATCH3D(3DSTATE_URB_ALLOC_GS); 1123 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1124 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1125 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1126 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1127 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1128 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1129 MATCH3D(3DSTATE_AMFS); 1130 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1131 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1132 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1133 MATCH3D(3DSTATE_MESH_CONTROL); 1134 MATCH3D(3DSTATE_MESH_DISTRIB); 1135 MATCH3D(3DSTATE_TASK_REDISTRIB); 1136 MATCH3D(3DSTATE_MESH_SHADER); 1137 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1138 MATCH3D(3DSTATE_TASK_CONTROL); 1139 MATCH3D(3DSTATE_TASK_SHADER); 1140 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1141 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1142 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1143 MATCH3D(3DSTATE_CLIP_MESH); 1144 MATCH3D(3DSTATE_SBE_MESH); 1145 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1146 1147 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1148 MATCH3D(3DSTATE_CHROMA_KEY); 1149 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1150 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1151 MATCH3D(3DSTATE_LINE_STIPPLE); 1152 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1153 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1154 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1155 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1156 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1157 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1158 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1159 MATCH3D(3DSTATE_SO_DECL_LIST); 1160 MATCH3D(3DSTATE_SO_BUFFER); 1161 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1162 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1163 MATCH3D(3DSTATE_3D_MODE); 1164 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1165 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1166 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1167 1168 default: 1169 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1170 *dw, pipeline, opcode, subopcode, numdw); 1171 return numdw; 1172 } 1173 } 1174 1175 static int dump_gfx_state_command(struct drm_printer *p, 1176 struct xe_gt *gt, 1177 u32 *dw, 1178 int remaining_dw) 1179 { 1180 u32 numdw = instr_dw(*dw); 1181 u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); 1182 1183 /* 1184 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1185 * remaining size of the LRC. 1186 */ 1187 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1188 numdw = remaining_dw; 1189 1190 switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { 1191 MATCH(STATE_WRITE_INLINE); 1192 1193 default: 1194 drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", 1195 *dw, opcode, numdw); 1196 return numdw; 1197 } 1198 } 1199 1200 void xe_lrc_dump_default(struct drm_printer *p, 1201 struct xe_gt *gt, 1202 enum xe_engine_class hwe_class) 1203 { 1204 u32 *dw; 1205 int remaining_dw, num_dw; 1206 1207 if (!gt->default_lrc[hwe_class]) { 1208 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1209 return; 1210 } 1211 1212 /* 1213 * Skip the beginning of the LRC since it contains the per-process 1214 * hardware status page. 1215 */ 1216 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1217 remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4; 1218 1219 while (remaining_dw > 0) { 1220 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1221 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1222 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1223 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1224 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { 1225 num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); 1226 } else { 1227 num_dw = min(instr_dw(*dw), remaining_dw); 1228 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 1229 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 1230 num_dw); 1231 } 1232 1233 dw += num_dw; 1234 remaining_dw -= num_dw; 1235 } 1236 } 1237 1238 struct instr_state { 1239 u32 instr; 1240 u16 num_dw; 1241 }; 1242 1243 static const struct instr_state xe_hpg_svg_state[] = { 1244 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 1245 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 1246 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 1247 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 1248 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 1249 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 1250 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 1251 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 1252 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 1253 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 1254 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 1255 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 1256 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 1257 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 1258 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 1259 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 1260 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 1261 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 1262 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 1263 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 1264 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 1265 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 1266 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 1267 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 1268 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 1269 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 1270 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 1271 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 1272 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 1273 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 1274 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 1275 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 1276 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 1277 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 1278 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 1279 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 1280 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 1281 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 1282 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 1283 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 1284 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 1285 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 1286 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 1287 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 1288 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 1289 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 1290 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 1291 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 1292 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 1293 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 1294 }; 1295 1296 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) 1297 { 1298 struct xe_gt *gt = q->hwe->gt; 1299 struct xe_device *xe = gt_to_xe(gt); 1300 const struct instr_state *state_table = NULL; 1301 int state_table_size = 0; 1302 1303 /* 1304 * At the moment we only need to emit non-register state for the RCS 1305 * engine. 1306 */ 1307 if (q->hwe->class != XE_ENGINE_CLASS_RENDER) 1308 return; 1309 1310 switch (GRAPHICS_VERx100(xe)) { 1311 case 1255: 1312 case 1270 ... 2004: 1313 state_table = xe_hpg_svg_state; 1314 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1315 break; 1316 default: 1317 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1318 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1319 return; 1320 } 1321 1322 for (int i = 0; i < state_table_size; i++) { 1323 u32 instr = state_table[i].instr; 1324 u16 num_dw = state_table[i].num_dw; 1325 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 1326 1327 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 1328 xe_gt_assert(gt, num_dw != 0); 1329 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 1330 1331 /* 1332 * Xe2's SVG context is the same as the one on DG2 / MTL 1333 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 1334 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 1335 * Just make the replacement here rather than defining a 1336 * whole separate table for the single trivial change. 1337 */ 1338 if (GRAPHICS_VER(xe) >= 20 && 1339 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 1340 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 1341 1342 bb->cs[bb->len] = instr; 1343 if (!is_single_dw) 1344 bb->cs[bb->len] |= (num_dw - 2); 1345 1346 bb->len += num_dw; 1347 } 1348 } 1349 1350 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) 1351 { 1352 struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT); 1353 1354 if (!snapshot) 1355 return NULL; 1356 1357 snapshot->context_desc = lower_32_bits(xe_lrc_ggtt_addr(lrc)); 1358 snapshot->head = xe_lrc_ring_head(lrc); 1359 snapshot->tail.internal = lrc->ring.tail; 1360 snapshot->tail.memory = xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); 1361 snapshot->start_seqno = xe_lrc_start_seqno(lrc); 1362 snapshot->seqno = xe_lrc_seqno(lrc); 1363 snapshot->lrc_bo = xe_bo_get(lrc->bo); 1364 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); 1365 snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; 1366 snapshot->lrc_snapshot = NULL; 1367 return snapshot; 1368 } 1369 1370 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) 1371 { 1372 struct xe_bo *bo; 1373 struct iosys_map src; 1374 1375 if (!snapshot) 1376 return; 1377 1378 bo = snapshot->lrc_bo; 1379 snapshot->lrc_bo = NULL; 1380 1381 snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); 1382 if (!snapshot->lrc_snapshot) 1383 goto put_bo; 1384 1385 dma_resv_lock(bo->ttm.base.resv, NULL); 1386 if (!ttm_bo_vmap(&bo->ttm, &src)) { 1387 xe_map_memcpy_from(xe_bo_device(bo), 1388 snapshot->lrc_snapshot, &src, snapshot->lrc_offset, 1389 snapshot->lrc_size); 1390 ttm_bo_vunmap(&bo->ttm, &src); 1391 } else { 1392 kvfree(snapshot->lrc_snapshot); 1393 snapshot->lrc_snapshot = NULL; 1394 } 1395 dma_resv_unlock(bo->ttm.base.resv); 1396 put_bo: 1397 xe_bo_put(bo); 1398 } 1399 1400 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) 1401 { 1402 unsigned long i; 1403 1404 if (!snapshot) 1405 return; 1406 1407 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); 1408 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); 1409 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 1410 snapshot->tail.internal, snapshot->tail.memory); 1411 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); 1412 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); 1413 1414 if (!snapshot->lrc_snapshot) 1415 return; 1416 1417 drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); 1418 drm_puts(p, "\t[HWSP].data: "); 1419 for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { 1420 u32 *val = snapshot->lrc_snapshot + i; 1421 char dumped[ASCII85_BUFSZ]; 1422 1423 drm_puts(p, ascii85_encode(*val, dumped)); 1424 } 1425 1426 drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); 1427 drm_puts(p, "\t[HWCTX].data: "); 1428 for (; i < snapshot->lrc_size; i += sizeof(u32)) { 1429 u32 *val = snapshot->lrc_snapshot + i; 1430 char dumped[ASCII85_BUFSZ]; 1431 1432 drm_puts(p, ascii85_encode(*val, dumped)); 1433 } 1434 drm_puts(p, "\n"); 1435 } 1436 1437 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) 1438 { 1439 if (!snapshot) 1440 return; 1441 1442 kvfree(snapshot->lrc_snapshot); 1443 if (snapshot->lrc_bo) 1444 xe_bo_put(snapshot->lrc_bo); 1445 kfree(snapshot); 1446 } 1447