1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include "instructions/xe_mi_commands.h" 9 #include "instructions/xe_gfxpipe_commands.h" 10 #include "regs/xe_engine_regs.h" 11 #include "regs/xe_gpu_commands.h" 12 #include "regs/xe_lrc_layout.h" 13 #include "xe_bb.h" 14 #include "xe_bo.h" 15 #include "xe_device.h" 16 #include "xe_drm_client.h" 17 #include "xe_exec_queue_types.h" 18 #include "xe_gt.h" 19 #include "xe_gt_printk.h" 20 #include "xe_hw_fence.h" 21 #include "xe_map.h" 22 #include "xe_memirq.h" 23 #include "xe_sriov.h" 24 #include "xe_vm.h" 25 26 #define LRC_VALID (1 << 0) 27 #define LRC_PRIVILEGE (1 << 8) 28 #define LRC_ADDRESSING_MODE_SHIFT 3 29 #define LRC_LEGACY_64B_CONTEXT 3 30 31 #define ENGINE_CLASS_SHIFT 61 32 #define ENGINE_INSTANCE_SHIFT 48 33 34 static struct xe_device * 35 lrc_to_xe(struct xe_lrc *lrc) 36 { 37 return gt_to_xe(lrc->fence_ctx.gt); 38 } 39 40 size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) 41 { 42 switch (class) { 43 case XE_ENGINE_CLASS_RENDER: 44 if (GRAPHICS_VER(xe) >= 20) 45 return 4 * SZ_4K; 46 else 47 return 14 * SZ_4K; 48 case XE_ENGINE_CLASS_COMPUTE: 49 /* 14 pages since graphics_ver == 11 */ 50 if (GRAPHICS_VER(xe) >= 20) 51 return 3 * SZ_4K; 52 else 53 return 14 * SZ_4K; 54 default: 55 WARN(1, "Unknown engine class: %d", class); 56 fallthrough; 57 case XE_ENGINE_CLASS_COPY: 58 case XE_ENGINE_CLASS_VIDEO_DECODE: 59 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 60 case XE_ENGINE_CLASS_OTHER: 61 return 2 * SZ_4K; 62 } 63 } 64 65 /* 66 * The per-platform tables are u8-encoded in @data. Decode @data and set the 67 * addresses' offset and commands in @regs. The following encoding is used 68 * for each byte. There are 2 steps: decoding commands and decoding addresses. 69 * 70 * Commands: 71 * [7]: create NOPs - number of NOPs are set in lower bits 72 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 73 * MI_LRI_FORCE_POSTED 74 * [5:0]: Number of NOPs or registers to set values to in case of 75 * MI_LOAD_REGISTER_IMM 76 * 77 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 78 * number of registers. They are set by using the REG/REG16 macros: the former 79 * is used for offsets smaller than 0x200 while the latter is for values bigger 80 * than that. Those macros already set all the bits documented below correctly: 81 * 82 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 83 * follow, for the lower bits 84 * [6:0]: Register offset, without considering the engine base. 85 * 86 * This function only tweaks the commands and register offsets. Values are not 87 * filled out. 88 */ 89 static void set_offsets(u32 *regs, 90 const u8 *data, 91 const struct xe_hw_engine *hwe) 92 #define NOP(x) (BIT(7) | (x)) 93 #define LRI(count, flags) ((flags) << 6 | (count) | \ 94 BUILD_BUG_ON_ZERO(count >= BIT(6))) 95 #define POSTED BIT(0) 96 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 97 #define REG16(x) \ 98 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 99 (((x) >> 2) & 0x7f) 100 #define END 0 101 { 102 const u32 base = hwe->mmio_base; 103 104 while (*data) { 105 u8 count, flags; 106 107 if (*data & BIT(7)) { /* skip */ 108 count = *data++ & ~BIT(7); 109 regs += count; 110 continue; 111 } 112 113 count = *data & 0x3f; 114 flags = *data >> 6; 115 data++; 116 117 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 118 if (flags & POSTED) 119 *regs |= MI_LRI_FORCE_POSTED; 120 *regs |= MI_LRI_LRM_CS_MMIO; 121 regs++; 122 123 xe_gt_assert(hwe->gt, count); 124 do { 125 u32 offset = 0; 126 u8 v; 127 128 do { 129 v = *data++; 130 offset <<= 7; 131 offset |= v & ~BIT(7); 132 } while (v & BIT(7)); 133 134 regs[0] = base + (offset << 2); 135 regs += 2; 136 } while (--count); 137 } 138 139 *regs = MI_BATCH_BUFFER_END | BIT(0); 140 } 141 142 static const u8 gen12_xcs_offsets[] = { 143 NOP(1), 144 LRI(13, POSTED), 145 REG16(0x244), 146 REG(0x034), 147 REG(0x030), 148 REG(0x038), 149 REG(0x03c), 150 REG(0x168), 151 REG(0x140), 152 REG(0x110), 153 REG(0x1c0), 154 REG(0x1c4), 155 REG(0x1c8), 156 REG(0x180), 157 REG16(0x2b4), 158 159 NOP(5), 160 LRI(9, POSTED), 161 REG16(0x3a8), 162 REG16(0x28c), 163 REG16(0x288), 164 REG16(0x284), 165 REG16(0x280), 166 REG16(0x27c), 167 REG16(0x278), 168 REG16(0x274), 169 REG16(0x270), 170 171 END 172 }; 173 174 static const u8 dg2_xcs_offsets[] = { 175 NOP(1), 176 LRI(15, POSTED), 177 REG16(0x244), 178 REG(0x034), 179 REG(0x030), 180 REG(0x038), 181 REG(0x03c), 182 REG(0x168), 183 REG(0x140), 184 REG(0x110), 185 REG(0x1c0), 186 REG(0x1c4), 187 REG(0x1c8), 188 REG(0x180), 189 REG16(0x2b4), 190 REG(0x120), 191 REG(0x124), 192 193 NOP(1), 194 LRI(9, POSTED), 195 REG16(0x3a8), 196 REG16(0x28c), 197 REG16(0x288), 198 REG16(0x284), 199 REG16(0x280), 200 REG16(0x27c), 201 REG16(0x278), 202 REG16(0x274), 203 REG16(0x270), 204 205 END 206 }; 207 208 static const u8 gen12_rcs_offsets[] = { 209 NOP(1), 210 LRI(13, POSTED), 211 REG16(0x244), 212 REG(0x034), 213 REG(0x030), 214 REG(0x038), 215 REG(0x03c), 216 REG(0x168), 217 REG(0x140), 218 REG(0x110), 219 REG(0x1c0), 220 REG(0x1c4), 221 REG(0x1c8), 222 REG(0x180), 223 REG16(0x2b4), 224 225 NOP(5), 226 LRI(9, POSTED), 227 REG16(0x3a8), 228 REG16(0x28c), 229 REG16(0x288), 230 REG16(0x284), 231 REG16(0x280), 232 REG16(0x27c), 233 REG16(0x278), 234 REG16(0x274), 235 REG16(0x270), 236 237 LRI(3, POSTED), 238 REG(0x1b0), 239 REG16(0x5a8), 240 REG16(0x5ac), 241 242 NOP(6), 243 LRI(1, 0), 244 REG(0x0c8), 245 NOP(3 + 9 + 1), 246 247 LRI(51, POSTED), 248 REG16(0x588), 249 REG16(0x588), 250 REG16(0x588), 251 REG16(0x588), 252 REG16(0x588), 253 REG16(0x588), 254 REG(0x028), 255 REG(0x09c), 256 REG(0x0c0), 257 REG(0x178), 258 REG(0x17c), 259 REG16(0x358), 260 REG(0x170), 261 REG(0x150), 262 REG(0x154), 263 REG(0x158), 264 REG16(0x41c), 265 REG16(0x600), 266 REG16(0x604), 267 REG16(0x608), 268 REG16(0x60c), 269 REG16(0x610), 270 REG16(0x614), 271 REG16(0x618), 272 REG16(0x61c), 273 REG16(0x620), 274 REG16(0x624), 275 REG16(0x628), 276 REG16(0x62c), 277 REG16(0x630), 278 REG16(0x634), 279 REG16(0x638), 280 REG16(0x63c), 281 REG16(0x640), 282 REG16(0x644), 283 REG16(0x648), 284 REG16(0x64c), 285 REG16(0x650), 286 REG16(0x654), 287 REG16(0x658), 288 REG16(0x65c), 289 REG16(0x660), 290 REG16(0x664), 291 REG16(0x668), 292 REG16(0x66c), 293 REG16(0x670), 294 REG16(0x674), 295 REG16(0x678), 296 REG16(0x67c), 297 REG(0x068), 298 REG(0x084), 299 NOP(1), 300 301 END 302 }; 303 304 static const u8 xehp_rcs_offsets[] = { 305 NOP(1), 306 LRI(13, POSTED), 307 REG16(0x244), 308 REG(0x034), 309 REG(0x030), 310 REG(0x038), 311 REG(0x03c), 312 REG(0x168), 313 REG(0x140), 314 REG(0x110), 315 REG(0x1c0), 316 REG(0x1c4), 317 REG(0x1c8), 318 REG(0x180), 319 REG16(0x2b4), 320 321 NOP(5), 322 LRI(9, POSTED), 323 REG16(0x3a8), 324 REG16(0x28c), 325 REG16(0x288), 326 REG16(0x284), 327 REG16(0x280), 328 REG16(0x27c), 329 REG16(0x278), 330 REG16(0x274), 331 REG16(0x270), 332 333 LRI(3, POSTED), 334 REG(0x1b0), 335 REG16(0x5a8), 336 REG16(0x5ac), 337 338 NOP(6), 339 LRI(1, 0), 340 REG(0x0c8), 341 342 END 343 }; 344 345 static const u8 dg2_rcs_offsets[] = { 346 NOP(1), 347 LRI(15, POSTED), 348 REG16(0x244), 349 REG(0x034), 350 REG(0x030), 351 REG(0x038), 352 REG(0x03c), 353 REG(0x168), 354 REG(0x140), 355 REG(0x110), 356 REG(0x1c0), 357 REG(0x1c4), 358 REG(0x1c8), 359 REG(0x180), 360 REG16(0x2b4), 361 REG(0x120), 362 REG(0x124), 363 364 NOP(1), 365 LRI(9, POSTED), 366 REG16(0x3a8), 367 REG16(0x28c), 368 REG16(0x288), 369 REG16(0x284), 370 REG16(0x280), 371 REG16(0x27c), 372 REG16(0x278), 373 REG16(0x274), 374 REG16(0x270), 375 376 LRI(3, POSTED), 377 REG(0x1b0), 378 REG16(0x5a8), 379 REG16(0x5ac), 380 381 NOP(6), 382 LRI(1, 0), 383 REG(0x0c8), 384 385 END 386 }; 387 388 static const u8 mtl_rcs_offsets[] = { 389 NOP(1), 390 LRI(15, POSTED), 391 REG16(0x244), 392 REG(0x034), 393 REG(0x030), 394 REG(0x038), 395 REG(0x03c), 396 REG(0x168), 397 REG(0x140), 398 REG(0x110), 399 REG(0x1c0), 400 REG(0x1c4), 401 REG(0x1c8), 402 REG(0x180), 403 REG16(0x2b4), 404 REG(0x120), 405 REG(0x124), 406 407 NOP(1), 408 LRI(9, POSTED), 409 REG16(0x3a8), 410 REG16(0x28c), 411 REG16(0x288), 412 REG16(0x284), 413 REG16(0x280), 414 REG16(0x27c), 415 REG16(0x278), 416 REG16(0x274), 417 REG16(0x270), 418 419 NOP(2), 420 LRI(2, POSTED), 421 REG16(0x5a8), 422 REG16(0x5ac), 423 424 NOP(6), 425 LRI(1, 0), 426 REG(0x0c8), 427 428 END 429 }; 430 431 #define XE2_CTX_COMMON \ 432 NOP(1), /* [0x00] */ \ 433 LRI(15, POSTED), /* [0x01] */ \ 434 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 435 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 436 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 437 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 438 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 439 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 440 REG(0x140), /* [0x0e] BB_ADDR */ \ 441 REG(0x110), /* [0x10] BB_STATE */ \ 442 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 443 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 444 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 445 REG(0x180), /* [0x18] CCID */ \ 446 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 447 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 448 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 449 \ 450 NOP(1), /* [0x20] */ \ 451 LRI(9, POSTED), /* [0x21] */ \ 452 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 453 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 454 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 455 REG16(0x284), /* [0x28] dummy reg */ \ 456 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 457 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 458 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 459 REG16(0x274), /* [0x30] PTBP_UDW */ \ 460 REG16(0x270) /* [0x32] PTBP_LDW */ 461 462 static const u8 xe2_rcs_offsets[] = { 463 XE2_CTX_COMMON, 464 465 NOP(2), /* [0x34] */ 466 LRI(2, POSTED), /* [0x36] */ 467 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 468 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 469 470 NOP(6), /* [0x41] */ 471 LRI(1, 0), /* [0x47] */ 472 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 473 474 END 475 }; 476 477 static const u8 xe2_bcs_offsets[] = { 478 XE2_CTX_COMMON, 479 480 NOP(4 + 8 + 1), /* [0x34] */ 481 LRI(2, POSTED), /* [0x41] */ 482 REG16(0x200), /* [0x42] BCS_SWCTRL */ 483 REG16(0x204), /* [0x44] BLIT_CCTL */ 484 485 END 486 }; 487 488 static const u8 xe2_xcs_offsets[] = { 489 XE2_CTX_COMMON, 490 491 END 492 }; 493 494 #undef END 495 #undef REG16 496 #undef REG 497 #undef LRI 498 #undef NOP 499 500 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 501 { 502 if (class == XE_ENGINE_CLASS_RENDER) { 503 if (GRAPHICS_VER(xe) >= 20) 504 return xe2_rcs_offsets; 505 else if (GRAPHICS_VERx100(xe) >= 1270) 506 return mtl_rcs_offsets; 507 else if (GRAPHICS_VERx100(xe) >= 1255) 508 return dg2_rcs_offsets; 509 else if (GRAPHICS_VERx100(xe) >= 1250) 510 return xehp_rcs_offsets; 511 else 512 return gen12_rcs_offsets; 513 } else if (class == XE_ENGINE_CLASS_COPY) { 514 if (GRAPHICS_VER(xe) >= 20) 515 return xe2_bcs_offsets; 516 else 517 return gen12_xcs_offsets; 518 } else { 519 if (GRAPHICS_VER(xe) >= 20) 520 return xe2_xcs_offsets; 521 else if (GRAPHICS_VERx100(xe) >= 1255) 522 return dg2_xcs_offsets; 523 else 524 return gen12_xcs_offsets; 525 } 526 } 527 528 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 529 { 530 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | 531 _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | 532 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; 533 534 /* TODO: Timestamp */ 535 } 536 537 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) 538 { 539 struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq; 540 struct xe_device *xe = gt_to_xe(hwe->gt); 541 542 if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe)) 543 return; 544 545 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | 546 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; 547 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 548 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 549 550 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | 551 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 552 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 553 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq); 554 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 555 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq); 556 } 557 558 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 559 { 560 struct xe_device *xe = gt_to_xe(hwe->gt); 561 562 if (GRAPHICS_VERx100(xe) >= 1250) 563 return 0x70; 564 else 565 return 0x60; 566 } 567 568 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 569 { 570 int x; 571 572 x = lrc_ring_mi_mode(hwe); 573 regs[x + 1] &= ~STOP_RING; 574 regs[x + 1] |= STOP_RING << 16; 575 } 576 577 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 578 { 579 return 0; 580 } 581 582 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 583 { 584 return lrc->ring.size; 585 } 586 587 /* Make the magic macros work */ 588 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 589 590 #define LRC_SEQNO_PPHWSP_OFFSET 512 591 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 592 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 593 #define LRC_PPHWSP_SIZE SZ_4K 594 595 static size_t lrc_reg_size(struct xe_device *xe) 596 { 597 if (GRAPHICS_VERx100(xe) >= 1250) 598 return 96 * sizeof(u32); 599 else 600 return 80 * sizeof(u32); 601 } 602 603 size_t xe_lrc_skip_size(struct xe_device *xe) 604 { 605 return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 606 } 607 608 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 609 { 610 /* The seqno is stored in the driver-defined portion of PPHWSP */ 611 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 612 } 613 614 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 615 { 616 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 617 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 618 } 619 620 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 621 { 622 /* The parallel is stored in the driver-defined portion of PPHWSP */ 623 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 624 } 625 626 static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) 627 { 628 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 629 } 630 631 #define DECL_MAP_ADDR_HELPERS(elem) \ 632 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 633 { \ 634 struct iosys_map map = lrc->bo->vmap; \ 635 \ 636 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 637 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 638 return map; \ 639 } \ 640 static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 641 { \ 642 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 643 } \ 644 645 DECL_MAP_ADDR_HELPERS(ring) 646 DECL_MAP_ADDR_HELPERS(pphwsp) 647 DECL_MAP_ADDR_HELPERS(seqno) 648 DECL_MAP_ADDR_HELPERS(regs) 649 DECL_MAP_ADDR_HELPERS(start_seqno) 650 DECL_MAP_ADDR_HELPERS(parallel) 651 652 #undef DECL_MAP_ADDR_HELPERS 653 654 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 655 { 656 return __xe_lrc_pphwsp_ggtt_addr(lrc); 657 } 658 659 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 660 { 661 struct xe_device *xe = lrc_to_xe(lrc); 662 struct iosys_map map; 663 664 map = __xe_lrc_regs_map(lrc); 665 iosys_map_incr(&map, reg_nr * sizeof(u32)); 666 return xe_map_read32(xe, &map); 667 } 668 669 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 670 { 671 struct xe_device *xe = lrc_to_xe(lrc); 672 struct iosys_map map; 673 674 map = __xe_lrc_regs_map(lrc); 675 iosys_map_incr(&map, reg_nr * sizeof(u32)); 676 xe_map_write32(xe, &map, val); 677 } 678 679 static void *empty_lrc_data(struct xe_hw_engine *hwe) 680 { 681 struct xe_device *xe = gt_to_xe(hwe->gt); 682 void *data; 683 u32 *regs; 684 685 data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); 686 if (!data) 687 return NULL; 688 689 /* 1st page: Per-Process of HW status Page */ 690 regs = data + LRC_PPHWSP_SIZE; 691 set_offsets(regs, reg_offsets(xe, hwe->class), hwe); 692 set_context_control(regs, hwe); 693 set_memory_based_intr(regs, hwe); 694 reset_stop_ring(regs, hwe); 695 696 return data; 697 } 698 699 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 700 { 701 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); 702 703 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 704 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 705 } 706 707 #define PVC_CTX_ASID (0x2e + 1) 708 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 709 #define ACC_GRANULARITY_S 20 710 #define ACC_NOTIFY_S 16 711 712 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 713 struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) 714 { 715 struct xe_gt *gt = hwe->gt; 716 struct xe_tile *tile = gt_to_tile(gt); 717 struct xe_device *xe = gt_to_xe(gt); 718 struct iosys_map map; 719 void *init_data = NULL; 720 u32 arb_enable; 721 int err; 722 723 lrc->flags = 0; 724 725 /* 726 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 727 * via VM bind calls. 728 */ 729 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, 730 ring_size + xe_lrc_size(xe, hwe->class), 731 ttm_bo_type_kernel, 732 XE_BO_CREATE_VRAM_IF_DGFX(tile) | 733 XE_BO_CREATE_GGTT_BIT); 734 if (IS_ERR(lrc->bo)) 735 return PTR_ERR(lrc->bo); 736 737 lrc->tile = gt_to_tile(hwe->gt); 738 lrc->ring.size = ring_size; 739 lrc->ring.tail = 0; 740 741 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 742 hwe->fence_irq, hwe->name); 743 744 if (!gt->default_lrc[hwe->class]) { 745 init_data = empty_lrc_data(hwe); 746 if (!init_data) { 747 err = -ENOMEM; 748 goto err_lrc_finish; 749 } 750 } 751 752 /* 753 * Init Per-Process of HW status Page, LRC / context state to known 754 * values 755 */ 756 map = __xe_lrc_pphwsp_map(lrc); 757 if (!init_data) { 758 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 759 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 760 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 761 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); 762 } else { 763 xe_map_memcpy_to(xe, &map, 0, init_data, 764 xe_lrc_size(xe, hwe->class)); 765 kfree(init_data); 766 } 767 768 if (vm) { 769 xe_lrc_set_ppgtt(lrc, vm); 770 771 if (vm->xef) 772 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 773 } 774 775 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 776 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 777 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 778 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 779 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 780 if (xe->info.has_asid && vm) 781 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, 782 (q->usm.acc_granularity << 783 ACC_GRANULARITY_S) | vm->usm.asid); 784 if (xe->info.has_usm && vm) 785 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, 786 (q->usm.acc_notify << ACC_NOTIFY_S) | 787 q->usm.acc_trigger); 788 789 lrc->desc = LRC_VALID; 790 lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; 791 /* TODO: Priority */ 792 793 /* While this appears to have something about privileged batches or 794 * some such, it really just means PPGTT mode. 795 */ 796 if (vm) 797 lrc->desc |= LRC_PRIVILEGE; 798 799 if (GRAPHICS_VERx100(xe) < 1250) { 800 lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; 801 lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT; 802 } 803 804 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 805 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 806 807 map = __xe_lrc_seqno_map(lrc); 808 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 809 810 map = __xe_lrc_start_seqno_map(lrc); 811 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 812 813 return 0; 814 815 err_lrc_finish: 816 xe_lrc_finish(lrc); 817 return err; 818 } 819 820 void xe_lrc_finish(struct xe_lrc *lrc) 821 { 822 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 823 xe_bo_lock(lrc->bo, false); 824 xe_bo_unpin(lrc->bo); 825 xe_bo_unlock(lrc->bo); 826 xe_bo_put(lrc->bo); 827 } 828 829 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 830 { 831 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 832 } 833 834 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 835 { 836 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 837 } 838 839 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 840 { 841 const u32 head = xe_lrc_ring_head(lrc); 842 const u32 tail = lrc->ring.tail; 843 const u32 size = lrc->ring.size; 844 845 return ((head - tail - 1) & (size - 1)) + 1; 846 } 847 848 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 849 const void *data, size_t size) 850 { 851 struct xe_device *xe = lrc_to_xe(lrc); 852 853 iosys_map_incr(&ring, lrc->ring.tail); 854 xe_map_memcpy_to(xe, &ring, 0, data, size); 855 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 856 } 857 858 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 859 { 860 struct xe_device *xe = lrc_to_xe(lrc); 861 struct iosys_map ring; 862 u32 rhs; 863 size_t aligned_size; 864 865 xe_assert(xe, IS_ALIGNED(size, 4)); 866 aligned_size = ALIGN(size, 8); 867 868 ring = __xe_lrc_ring_map(lrc); 869 870 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 871 rhs = lrc->ring.size - lrc->ring.tail; 872 if (size > rhs) { 873 __xe_lrc_write_ring(lrc, ring, data, rhs); 874 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 875 } else { 876 __xe_lrc_write_ring(lrc, ring, data, size); 877 } 878 879 if (aligned_size > size) { 880 u32 noop = MI_NOOP; 881 882 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 883 } 884 } 885 886 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 887 { 888 return lrc->desc | xe_lrc_ggtt_addr(lrc); 889 } 890 891 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 892 { 893 return __xe_lrc_seqno_ggtt_addr(lrc); 894 } 895 896 struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc) 897 { 898 return &xe_hw_fence_create(&lrc->fence_ctx, 899 __xe_lrc_seqno_map(lrc))->dma; 900 } 901 902 s32 xe_lrc_seqno(struct xe_lrc *lrc) 903 { 904 struct iosys_map map = __xe_lrc_seqno_map(lrc); 905 906 return xe_map_read32(lrc_to_xe(lrc), &map); 907 } 908 909 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 910 { 911 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 912 913 return xe_map_read32(lrc_to_xe(lrc), &map); 914 } 915 916 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 917 { 918 return __xe_lrc_start_seqno_ggtt_addr(lrc); 919 } 920 921 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 922 { 923 return __xe_lrc_parallel_ggtt_addr(lrc); 924 } 925 926 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 927 { 928 return __xe_lrc_parallel_map(lrc); 929 } 930 931 static int instr_dw(u32 cmd_header) 932 { 933 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 934 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 935 GFXPIPE_SINGLE_DW_CMD(0, 0)) 936 return 1; 937 938 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 939 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 940 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 941 942 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 943 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 944 } 945 946 static int dump_mi_command(struct drm_printer *p, 947 struct xe_gt *gt, 948 u32 *dw, 949 int remaining_dw) 950 { 951 u32 inst_header = *dw; 952 u32 numdw = instr_dw(inst_header); 953 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 954 int num_noop; 955 956 /* First check for commands that don't have/use a '# DW' field */ 957 switch (inst_header & MI_OPCODE) { 958 case MI_NOOP: 959 num_noop = 1; 960 while (num_noop < remaining_dw && 961 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 962 num_noop++; 963 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 964 return num_noop; 965 966 case MI_TOPOLOGY_FILTER: 967 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 968 return 1; 969 970 case MI_BATCH_BUFFER_END: 971 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 972 /* Return 'remaining_dw' to consume the rest of the LRC */ 973 return remaining_dw; 974 } 975 976 /* 977 * Any remaining commands include a # of dwords. We should make sure 978 * it doesn't exceed the remaining size of the LRC. 979 */ 980 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 981 numdw = remaining_dw; 982 983 switch (inst_header & MI_OPCODE) { 984 case MI_LOAD_REGISTER_IMM: 985 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 986 inst_header, (numdw - 1) / 2); 987 for (int i = 1; i < numdw; i += 2) 988 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 989 return numdw; 990 991 case MI_LOAD_REGISTER_MEM & MI_OPCODE: 992 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", 993 inst_header, 994 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", 995 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); 996 if (numdw == 4) 997 drm_printf(p, " - %#6x = %#010llx\n", 998 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); 999 else 1000 drm_printf(p, " - %*ph (%s)\n", 1001 (int)sizeof(u32) * (numdw - 1), dw + 1, 1002 numdw < 4 ? "truncated" : "malformed"); 1003 return numdw; 1004 1005 case MI_FORCE_WAKEUP: 1006 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 1007 return numdw; 1008 1009 default: 1010 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 1011 inst_header, opcode, numdw); 1012 return numdw; 1013 } 1014 } 1015 1016 static int dump_gfxpipe_command(struct drm_printer *p, 1017 struct xe_gt *gt, 1018 u32 *dw, 1019 int remaining_dw) 1020 { 1021 u32 numdw = instr_dw(*dw); 1022 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 1023 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 1024 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 1025 1026 /* 1027 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1028 * remaining size of the LRC. 1029 */ 1030 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1031 numdw = remaining_dw; 1032 1033 switch (*dw & GFXPIPE_MATCH_MASK) { 1034 #define MATCH(cmd) \ 1035 case cmd: \ 1036 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1037 return numdw 1038 #define MATCH3D(cmd) \ 1039 case CMD_##cmd: \ 1040 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1041 return numdw 1042 1043 MATCH(STATE_BASE_ADDRESS); 1044 MATCH(STATE_SIP); 1045 MATCH(GPGPU_CSR_BASE_ADDRESS); 1046 MATCH(STATE_COMPUTE_MODE); 1047 MATCH3D(3DSTATE_BTD); 1048 1049 MATCH3D(3DSTATE_VF_STATISTICS); 1050 1051 MATCH(PIPELINE_SELECT); 1052 1053 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1054 MATCH3D(3DSTATE_CLEAR_PARAMS); 1055 MATCH3D(3DSTATE_DEPTH_BUFFER); 1056 MATCH3D(3DSTATE_STENCIL_BUFFER); 1057 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1058 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1059 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1060 MATCH3D(3DSTATE_INDEX_BUFFER); 1061 MATCH3D(3DSTATE_VF); 1062 MATCH3D(3DSTATE_MULTISAMPLE); 1063 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1064 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1065 MATCH3D(3DSTATE_VS); 1066 MATCH3D(3DSTATE_GS); 1067 MATCH3D(3DSTATE_CLIP); 1068 MATCH3D(3DSTATE_SF); 1069 MATCH3D(3DSTATE_WM); 1070 MATCH3D(3DSTATE_CONSTANT_VS); 1071 MATCH3D(3DSTATE_CONSTANT_GS); 1072 MATCH3D(3DSTATE_SAMPLE_MASK); 1073 MATCH3D(3DSTATE_CONSTANT_HS); 1074 MATCH3D(3DSTATE_CONSTANT_DS); 1075 MATCH3D(3DSTATE_HS); 1076 MATCH3D(3DSTATE_TE); 1077 MATCH3D(3DSTATE_DS); 1078 MATCH3D(3DSTATE_STREAMOUT); 1079 MATCH3D(3DSTATE_SBE); 1080 MATCH3D(3DSTATE_PS); 1081 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1082 MATCH3D(3DSTATE_CPS_POINTERS); 1083 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1084 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1085 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1086 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1087 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1088 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1089 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1090 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1091 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1092 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1093 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1094 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1095 MATCH3D(3DSTATE_VF_INSTANCING); 1096 MATCH3D(3DSTATE_VF_SGVS); 1097 MATCH3D(3DSTATE_VF_TOPOLOGY); 1098 MATCH3D(3DSTATE_WM_CHROMAKEY); 1099 MATCH3D(3DSTATE_PS_BLEND); 1100 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1101 MATCH3D(3DSTATE_PS_EXTRA); 1102 MATCH3D(3DSTATE_RASTER); 1103 MATCH3D(3DSTATE_SBE_SWIZ); 1104 MATCH3D(3DSTATE_WM_HZ_OP); 1105 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1106 MATCH3D(3DSTATE_VF_SGVS_2); 1107 MATCH3D(3DSTATE_VFG); 1108 MATCH3D(3DSTATE_URB_ALLOC_VS); 1109 MATCH3D(3DSTATE_URB_ALLOC_HS); 1110 MATCH3D(3DSTATE_URB_ALLOC_DS); 1111 MATCH3D(3DSTATE_URB_ALLOC_GS); 1112 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1113 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1114 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1115 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1116 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1117 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1118 MATCH3D(3DSTATE_AMFS); 1119 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1120 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1121 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1122 MATCH3D(3DSTATE_MESH_CONTROL); 1123 MATCH3D(3DSTATE_MESH_DISTRIB); 1124 MATCH3D(3DSTATE_TASK_REDISTRIB); 1125 MATCH3D(3DSTATE_MESH_SHADER); 1126 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1127 MATCH3D(3DSTATE_TASK_CONTROL); 1128 MATCH3D(3DSTATE_TASK_SHADER); 1129 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1130 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1131 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1132 MATCH3D(3DSTATE_CLIP_MESH); 1133 MATCH3D(3DSTATE_SBE_MESH); 1134 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1135 1136 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1137 MATCH3D(3DSTATE_CHROMA_KEY); 1138 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1139 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1140 MATCH3D(3DSTATE_LINE_STIPPLE); 1141 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1142 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1143 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1144 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1145 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1146 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1147 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1148 MATCH3D(3DSTATE_SO_DECL_LIST); 1149 MATCH3D(3DSTATE_SO_BUFFER); 1150 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1151 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1152 MATCH3D(3DSTATE_3D_MODE); 1153 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1154 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1155 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1156 1157 default: 1158 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1159 *dw, pipeline, opcode, subopcode, numdw); 1160 return numdw; 1161 } 1162 } 1163 1164 void xe_lrc_dump_default(struct drm_printer *p, 1165 struct xe_gt *gt, 1166 enum xe_engine_class hwe_class) 1167 { 1168 u32 *dw; 1169 int remaining_dw, num_dw; 1170 1171 if (!gt->default_lrc[hwe_class]) { 1172 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1173 return; 1174 } 1175 1176 /* 1177 * Skip the beginning of the LRC since it contains the per-process 1178 * hardware status page. 1179 */ 1180 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1181 remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4; 1182 1183 while (remaining_dw > 0) { 1184 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1185 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1186 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1187 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1188 } else { 1189 num_dw = min(instr_dw(*dw), remaining_dw); 1190 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 1191 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 1192 num_dw); 1193 } 1194 1195 dw += num_dw; 1196 remaining_dw -= num_dw; 1197 } 1198 } 1199 1200 struct instr_state { 1201 u32 instr; 1202 u16 num_dw; 1203 }; 1204 1205 static const struct instr_state xe_hpg_svg_state[] = { 1206 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 1207 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 1208 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 1209 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 1210 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 1211 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 1212 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 1213 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 1214 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 1215 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 1216 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 1217 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 1218 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 1219 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 1220 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 1221 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 1222 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 1223 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 1224 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 1225 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 1226 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 1227 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 1228 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 1229 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 1230 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 1231 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 1232 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 1233 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 1234 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 1235 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 1236 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 1237 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 1238 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 1239 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 1240 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 1241 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 1242 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 1243 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 1244 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 1245 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 1246 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 1247 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 1248 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 1249 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 1250 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 1251 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 1252 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 1253 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 1254 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 1255 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 1256 }; 1257 1258 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) 1259 { 1260 struct xe_gt *gt = q->hwe->gt; 1261 struct xe_device *xe = gt_to_xe(gt); 1262 const struct instr_state *state_table = NULL; 1263 int state_table_size = 0; 1264 1265 /* 1266 * At the moment we only need to emit non-register state for the RCS 1267 * engine. 1268 */ 1269 if (q->hwe->class != XE_ENGINE_CLASS_RENDER) 1270 return; 1271 1272 switch (GRAPHICS_VERx100(xe)) { 1273 case 1255: 1274 case 1270 ... 2004: 1275 state_table = xe_hpg_svg_state; 1276 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1277 break; 1278 default: 1279 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1280 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1281 return; 1282 } 1283 1284 for (int i = 0; i < state_table_size; i++) { 1285 u32 instr = state_table[i].instr; 1286 u16 num_dw = state_table[i].num_dw; 1287 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 1288 1289 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 1290 xe_gt_assert(gt, num_dw != 0); 1291 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 1292 1293 /* 1294 * Xe2's SVG context is the same as the one on DG2 / MTL 1295 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 1296 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 1297 * Just make the replacement here rather than defining a 1298 * whole separate table for the single trivial change. 1299 */ 1300 if (GRAPHICS_VER(xe) >= 20 && 1301 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 1302 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 1303 1304 bb->cs[bb->len] = instr; 1305 if (!is_single_dw) 1306 bb->cs[bb->len] |= (num_dw - 2); 1307 1308 bb->len += num_dw; 1309 } 1310 } 1311