1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include "instructions/xe_mi_commands.h" 9 #include "instructions/xe_gfxpipe_commands.h" 10 #include "regs/xe_engine_regs.h" 11 #include "regs/xe_gpu_commands.h" 12 #include "regs/xe_lrc_layout.h" 13 #include "xe_bb.h" 14 #include "xe_bo.h" 15 #include "xe_device.h" 16 #include "xe_drm_client.h" 17 #include "xe_exec_queue_types.h" 18 #include "xe_gt.h" 19 #include "xe_gt_printk.h" 20 #include "xe_hw_fence.h" 21 #include "xe_map.h" 22 #include "xe_memirq.h" 23 #include "xe_sriov.h" 24 #include "xe_vm.h" 25 26 #define LRC_VALID (1 << 0) 27 #define LRC_PRIVILEGE (1 << 8) 28 #define LRC_ADDRESSING_MODE_SHIFT 3 29 #define LRC_LEGACY_64B_CONTEXT 3 30 31 #define ENGINE_CLASS_SHIFT 61 32 #define ENGINE_INSTANCE_SHIFT 48 33 34 static struct xe_device * 35 lrc_to_xe(struct xe_lrc *lrc) 36 { 37 return gt_to_xe(lrc->fence_ctx.gt); 38 } 39 40 size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) 41 { 42 switch (class) { 43 case XE_ENGINE_CLASS_RENDER: 44 if (GRAPHICS_VER(xe) >= 20) 45 return 4 * SZ_4K; 46 else 47 return 14 * SZ_4K; 48 case XE_ENGINE_CLASS_COMPUTE: 49 /* 14 pages since graphics_ver == 11 */ 50 if (GRAPHICS_VER(xe) >= 20) 51 return 3 * SZ_4K; 52 else 53 return 14 * SZ_4K; 54 default: 55 WARN(1, "Unknown engine class: %d", class); 56 fallthrough; 57 case XE_ENGINE_CLASS_COPY: 58 case XE_ENGINE_CLASS_VIDEO_DECODE: 59 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 60 case XE_ENGINE_CLASS_OTHER: 61 return 2 * SZ_4K; 62 } 63 } 64 65 /* 66 * The per-platform tables are u8-encoded in @data. Decode @data and set the 67 * addresses' offset and commands in @regs. The following encoding is used 68 * for each byte. There are 2 steps: decoding commands and decoding addresses. 69 * 70 * Commands: 71 * [7]: create NOPs - number of NOPs are set in lower bits 72 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 73 * MI_LRI_FORCE_POSTED 74 * [5:0]: Number of NOPs or registers to set values to in case of 75 * MI_LOAD_REGISTER_IMM 76 * 77 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 78 * number of registers. They are set by using the REG/REG16 macros: the former 79 * is used for offsets smaller than 0x200 while the latter is for values bigger 80 * than that. Those macros already set all the bits documented below correctly: 81 * 82 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 83 * follow, for the lower bits 84 * [6:0]: Register offset, without considering the engine base. 85 * 86 * This function only tweaks the commands and register offsets. Values are not 87 * filled out. 88 */ 89 static void set_offsets(u32 *regs, 90 const u8 *data, 91 const struct xe_hw_engine *hwe) 92 #define NOP(x) (BIT(7) | (x)) 93 #define LRI(count, flags) ((flags) << 6 | (count) | \ 94 BUILD_BUG_ON_ZERO(count >= BIT(6))) 95 #define POSTED BIT(0) 96 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 97 #define REG16(x) \ 98 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 99 (((x) >> 2) & 0x7f) 100 { 101 const u32 base = hwe->mmio_base; 102 103 while (*data) { 104 u8 count, flags; 105 106 if (*data & BIT(7)) { /* skip */ 107 count = *data++ & ~BIT(7); 108 regs += count; 109 continue; 110 } 111 112 count = *data & 0x3f; 113 flags = *data >> 6; 114 data++; 115 116 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 117 if (flags & POSTED) 118 *regs |= MI_LRI_FORCE_POSTED; 119 *regs |= MI_LRI_LRM_CS_MMIO; 120 regs++; 121 122 xe_gt_assert(hwe->gt, count); 123 do { 124 u32 offset = 0; 125 u8 v; 126 127 do { 128 v = *data++; 129 offset <<= 7; 130 offset |= v & ~BIT(7); 131 } while (v & BIT(7)); 132 133 regs[0] = base + (offset << 2); 134 regs += 2; 135 } while (--count); 136 } 137 138 *regs = MI_BATCH_BUFFER_END | BIT(0); 139 } 140 141 static const u8 gen12_xcs_offsets[] = { 142 NOP(1), 143 LRI(13, POSTED), 144 REG16(0x244), 145 REG(0x034), 146 REG(0x030), 147 REG(0x038), 148 REG(0x03c), 149 REG(0x168), 150 REG(0x140), 151 REG(0x110), 152 REG(0x1c0), 153 REG(0x1c4), 154 REG(0x1c8), 155 REG(0x180), 156 REG16(0x2b4), 157 158 NOP(5), 159 LRI(9, POSTED), 160 REG16(0x3a8), 161 REG16(0x28c), 162 REG16(0x288), 163 REG16(0x284), 164 REG16(0x280), 165 REG16(0x27c), 166 REG16(0x278), 167 REG16(0x274), 168 REG16(0x270), 169 170 0 171 }; 172 173 static const u8 dg2_xcs_offsets[] = { 174 NOP(1), 175 LRI(15, POSTED), 176 REG16(0x244), 177 REG(0x034), 178 REG(0x030), 179 REG(0x038), 180 REG(0x03c), 181 REG(0x168), 182 REG(0x140), 183 REG(0x110), 184 REG(0x1c0), 185 REG(0x1c4), 186 REG(0x1c8), 187 REG(0x180), 188 REG16(0x2b4), 189 REG(0x120), 190 REG(0x124), 191 192 NOP(1), 193 LRI(9, POSTED), 194 REG16(0x3a8), 195 REG16(0x28c), 196 REG16(0x288), 197 REG16(0x284), 198 REG16(0x280), 199 REG16(0x27c), 200 REG16(0x278), 201 REG16(0x274), 202 REG16(0x270), 203 204 0 205 }; 206 207 static const u8 gen12_rcs_offsets[] = { 208 NOP(1), 209 LRI(13, POSTED), 210 REG16(0x244), 211 REG(0x034), 212 REG(0x030), 213 REG(0x038), 214 REG(0x03c), 215 REG(0x168), 216 REG(0x140), 217 REG(0x110), 218 REG(0x1c0), 219 REG(0x1c4), 220 REG(0x1c8), 221 REG(0x180), 222 REG16(0x2b4), 223 224 NOP(5), 225 LRI(9, POSTED), 226 REG16(0x3a8), 227 REG16(0x28c), 228 REG16(0x288), 229 REG16(0x284), 230 REG16(0x280), 231 REG16(0x27c), 232 REG16(0x278), 233 REG16(0x274), 234 REG16(0x270), 235 236 LRI(3, POSTED), 237 REG(0x1b0), 238 REG16(0x5a8), 239 REG16(0x5ac), 240 241 NOP(6), 242 LRI(1, 0), 243 REG(0x0c8), 244 NOP(3 + 9 + 1), 245 246 LRI(51, POSTED), 247 REG16(0x588), 248 REG16(0x588), 249 REG16(0x588), 250 REG16(0x588), 251 REG16(0x588), 252 REG16(0x588), 253 REG(0x028), 254 REG(0x09c), 255 REG(0x0c0), 256 REG(0x178), 257 REG(0x17c), 258 REG16(0x358), 259 REG(0x170), 260 REG(0x150), 261 REG(0x154), 262 REG(0x158), 263 REG16(0x41c), 264 REG16(0x600), 265 REG16(0x604), 266 REG16(0x608), 267 REG16(0x60c), 268 REG16(0x610), 269 REG16(0x614), 270 REG16(0x618), 271 REG16(0x61c), 272 REG16(0x620), 273 REG16(0x624), 274 REG16(0x628), 275 REG16(0x62c), 276 REG16(0x630), 277 REG16(0x634), 278 REG16(0x638), 279 REG16(0x63c), 280 REG16(0x640), 281 REG16(0x644), 282 REG16(0x648), 283 REG16(0x64c), 284 REG16(0x650), 285 REG16(0x654), 286 REG16(0x658), 287 REG16(0x65c), 288 REG16(0x660), 289 REG16(0x664), 290 REG16(0x668), 291 REG16(0x66c), 292 REG16(0x670), 293 REG16(0x674), 294 REG16(0x678), 295 REG16(0x67c), 296 REG(0x068), 297 REG(0x084), 298 NOP(1), 299 300 0 301 }; 302 303 static const u8 xehp_rcs_offsets[] = { 304 NOP(1), 305 LRI(13, POSTED), 306 REG16(0x244), 307 REG(0x034), 308 REG(0x030), 309 REG(0x038), 310 REG(0x03c), 311 REG(0x168), 312 REG(0x140), 313 REG(0x110), 314 REG(0x1c0), 315 REG(0x1c4), 316 REG(0x1c8), 317 REG(0x180), 318 REG16(0x2b4), 319 320 NOP(5), 321 LRI(9, POSTED), 322 REG16(0x3a8), 323 REG16(0x28c), 324 REG16(0x288), 325 REG16(0x284), 326 REG16(0x280), 327 REG16(0x27c), 328 REG16(0x278), 329 REG16(0x274), 330 REG16(0x270), 331 332 LRI(3, POSTED), 333 REG(0x1b0), 334 REG16(0x5a8), 335 REG16(0x5ac), 336 337 NOP(6), 338 LRI(1, 0), 339 REG(0x0c8), 340 341 0 342 }; 343 344 static const u8 dg2_rcs_offsets[] = { 345 NOP(1), 346 LRI(15, POSTED), 347 REG16(0x244), 348 REG(0x034), 349 REG(0x030), 350 REG(0x038), 351 REG(0x03c), 352 REG(0x168), 353 REG(0x140), 354 REG(0x110), 355 REG(0x1c0), 356 REG(0x1c4), 357 REG(0x1c8), 358 REG(0x180), 359 REG16(0x2b4), 360 REG(0x120), 361 REG(0x124), 362 363 NOP(1), 364 LRI(9, POSTED), 365 REG16(0x3a8), 366 REG16(0x28c), 367 REG16(0x288), 368 REG16(0x284), 369 REG16(0x280), 370 REG16(0x27c), 371 REG16(0x278), 372 REG16(0x274), 373 REG16(0x270), 374 375 LRI(3, POSTED), 376 REG(0x1b0), 377 REG16(0x5a8), 378 REG16(0x5ac), 379 380 NOP(6), 381 LRI(1, 0), 382 REG(0x0c8), 383 384 0 385 }; 386 387 static const u8 mtl_rcs_offsets[] = { 388 NOP(1), 389 LRI(15, POSTED), 390 REG16(0x244), 391 REG(0x034), 392 REG(0x030), 393 REG(0x038), 394 REG(0x03c), 395 REG(0x168), 396 REG(0x140), 397 REG(0x110), 398 REG(0x1c0), 399 REG(0x1c4), 400 REG(0x1c8), 401 REG(0x180), 402 REG16(0x2b4), 403 REG(0x120), 404 REG(0x124), 405 406 NOP(1), 407 LRI(9, POSTED), 408 REG16(0x3a8), 409 REG16(0x28c), 410 REG16(0x288), 411 REG16(0x284), 412 REG16(0x280), 413 REG16(0x27c), 414 REG16(0x278), 415 REG16(0x274), 416 REG16(0x270), 417 418 NOP(2), 419 LRI(2, POSTED), 420 REG16(0x5a8), 421 REG16(0x5ac), 422 423 NOP(6), 424 LRI(1, 0), 425 REG(0x0c8), 426 427 0 428 }; 429 430 #define XE2_CTX_COMMON \ 431 NOP(1), /* [0x00] */ \ 432 LRI(15, POSTED), /* [0x01] */ \ 433 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 434 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 435 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 436 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 437 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 438 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 439 REG(0x140), /* [0x0e] BB_ADDR */ \ 440 REG(0x110), /* [0x10] BB_STATE */ \ 441 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 442 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 443 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 444 REG(0x180), /* [0x18] CCID */ \ 445 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 446 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 447 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 448 \ 449 NOP(1), /* [0x20] */ \ 450 LRI(9, POSTED), /* [0x21] */ \ 451 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 452 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 453 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 454 REG16(0x284), /* [0x28] dummy reg */ \ 455 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 456 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 457 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 458 REG16(0x274), /* [0x30] PTBP_UDW */ \ 459 REG16(0x270) /* [0x32] PTBP_LDW */ 460 461 static const u8 xe2_rcs_offsets[] = { 462 XE2_CTX_COMMON, 463 464 NOP(2), /* [0x34] */ 465 LRI(2, POSTED), /* [0x36] */ 466 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 467 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 468 469 NOP(6), /* [0x41] */ 470 LRI(1, 0), /* [0x47] */ 471 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 472 473 0 474 }; 475 476 static const u8 xe2_bcs_offsets[] = { 477 XE2_CTX_COMMON, 478 479 NOP(4 + 8 + 1), /* [0x34] */ 480 LRI(2, POSTED), /* [0x41] */ 481 REG16(0x200), /* [0x42] BCS_SWCTRL */ 482 REG16(0x204), /* [0x44] BLIT_CCTL */ 483 484 0 485 }; 486 487 static const u8 xe2_xcs_offsets[] = { 488 XE2_CTX_COMMON, 489 490 0 491 }; 492 493 #undef REG16 494 #undef REG 495 #undef LRI 496 #undef NOP 497 498 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 499 { 500 if (class == XE_ENGINE_CLASS_RENDER) { 501 if (GRAPHICS_VER(xe) >= 20) 502 return xe2_rcs_offsets; 503 else if (GRAPHICS_VERx100(xe) >= 1270) 504 return mtl_rcs_offsets; 505 else if (GRAPHICS_VERx100(xe) >= 1255) 506 return dg2_rcs_offsets; 507 else if (GRAPHICS_VERx100(xe) >= 1250) 508 return xehp_rcs_offsets; 509 else 510 return gen12_rcs_offsets; 511 } else if (class == XE_ENGINE_CLASS_COPY) { 512 if (GRAPHICS_VER(xe) >= 20) 513 return xe2_bcs_offsets; 514 else 515 return gen12_xcs_offsets; 516 } else { 517 if (GRAPHICS_VER(xe) >= 20) 518 return xe2_xcs_offsets; 519 else if (GRAPHICS_VERx100(xe) >= 1255) 520 return dg2_xcs_offsets; 521 else 522 return gen12_xcs_offsets; 523 } 524 } 525 526 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 527 { 528 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | 529 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 530 531 /* TODO: Timestamp */ 532 } 533 534 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) 535 { 536 struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq; 537 struct xe_device *xe = gt_to_xe(hwe->gt); 538 539 if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe)) 540 return; 541 542 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | 543 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; 544 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 545 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 546 547 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | 548 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 549 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 550 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq); 551 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 552 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq); 553 } 554 555 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 556 { 557 struct xe_device *xe = gt_to_xe(hwe->gt); 558 559 if (GRAPHICS_VERx100(xe) >= 1250) 560 return 0x70; 561 else 562 return 0x60; 563 } 564 565 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 566 { 567 int x; 568 569 x = lrc_ring_mi_mode(hwe); 570 regs[x + 1] &= ~STOP_RING; 571 regs[x + 1] |= STOP_RING << 16; 572 } 573 574 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 575 { 576 return 0; 577 } 578 579 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 580 { 581 return lrc->ring.size; 582 } 583 584 /* Make the magic macros work */ 585 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 586 587 #define LRC_SEQNO_PPHWSP_OFFSET 512 588 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 589 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 590 #define LRC_PPHWSP_SIZE SZ_4K 591 592 static size_t lrc_reg_size(struct xe_device *xe) 593 { 594 if (GRAPHICS_VERx100(xe) >= 1250) 595 return 96 * sizeof(u32); 596 else 597 return 80 * sizeof(u32); 598 } 599 600 size_t xe_lrc_skip_size(struct xe_device *xe) 601 { 602 return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 603 } 604 605 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 606 { 607 /* The seqno is stored in the driver-defined portion of PPHWSP */ 608 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 609 } 610 611 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 612 { 613 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 614 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 615 } 616 617 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 618 { 619 /* The parallel is stored in the driver-defined portion of PPHWSP */ 620 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 621 } 622 623 static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) 624 { 625 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 626 } 627 628 #define DECL_MAP_ADDR_HELPERS(elem) \ 629 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 630 { \ 631 struct iosys_map map = lrc->bo->vmap; \ 632 \ 633 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 634 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 635 return map; \ 636 } \ 637 static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 638 { \ 639 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 640 } \ 641 642 DECL_MAP_ADDR_HELPERS(ring) 643 DECL_MAP_ADDR_HELPERS(pphwsp) 644 DECL_MAP_ADDR_HELPERS(seqno) 645 DECL_MAP_ADDR_HELPERS(regs) 646 DECL_MAP_ADDR_HELPERS(start_seqno) 647 DECL_MAP_ADDR_HELPERS(parallel) 648 649 #undef DECL_MAP_ADDR_HELPERS 650 651 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 652 { 653 return __xe_lrc_pphwsp_ggtt_addr(lrc); 654 } 655 656 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 657 { 658 struct xe_device *xe = lrc_to_xe(lrc); 659 struct iosys_map map; 660 661 map = __xe_lrc_regs_map(lrc); 662 iosys_map_incr(&map, reg_nr * sizeof(u32)); 663 return xe_map_read32(xe, &map); 664 } 665 666 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 667 { 668 struct xe_device *xe = lrc_to_xe(lrc); 669 struct iosys_map map; 670 671 map = __xe_lrc_regs_map(lrc); 672 iosys_map_incr(&map, reg_nr * sizeof(u32)); 673 xe_map_write32(xe, &map, val); 674 } 675 676 static void *empty_lrc_data(struct xe_hw_engine *hwe) 677 { 678 struct xe_device *xe = gt_to_xe(hwe->gt); 679 void *data; 680 u32 *regs; 681 682 data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); 683 if (!data) 684 return NULL; 685 686 /* 1st page: Per-Process of HW status Page */ 687 regs = data + LRC_PPHWSP_SIZE; 688 set_offsets(regs, reg_offsets(xe, hwe->class), hwe); 689 set_context_control(regs, hwe); 690 set_memory_based_intr(regs, hwe); 691 reset_stop_ring(regs, hwe); 692 693 return data; 694 } 695 696 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 697 { 698 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); 699 700 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 701 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 702 } 703 704 #define PVC_CTX_ASID (0x2e + 1) 705 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 706 707 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 708 struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) 709 { 710 struct xe_gt *gt = hwe->gt; 711 struct xe_tile *tile = gt_to_tile(gt); 712 struct xe_device *xe = gt_to_xe(gt); 713 struct iosys_map map; 714 void *init_data = NULL; 715 u32 arb_enable; 716 int err; 717 718 lrc->flags = 0; 719 720 /* 721 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 722 * via VM bind calls. 723 */ 724 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, 725 ring_size + xe_lrc_size(xe, hwe->class), 726 ttm_bo_type_kernel, 727 XE_BO_CREATE_VRAM_IF_DGFX(tile) | 728 XE_BO_CREATE_GGTT_BIT); 729 if (IS_ERR(lrc->bo)) 730 return PTR_ERR(lrc->bo); 731 732 lrc->tile = gt_to_tile(hwe->gt); 733 lrc->ring.size = ring_size; 734 lrc->ring.tail = 0; 735 736 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 737 hwe->fence_irq, hwe->name); 738 739 if (!gt->default_lrc[hwe->class]) { 740 init_data = empty_lrc_data(hwe); 741 if (!init_data) { 742 err = -ENOMEM; 743 goto err_lrc_finish; 744 } 745 } 746 747 /* 748 * Init Per-Process of HW status Page, LRC / context state to known 749 * values 750 */ 751 map = __xe_lrc_pphwsp_map(lrc); 752 if (!init_data) { 753 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 754 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 755 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 756 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); 757 } else { 758 xe_map_memcpy_to(xe, &map, 0, init_data, 759 xe_lrc_size(xe, hwe->class)); 760 kfree(init_data); 761 } 762 763 if (vm) { 764 xe_lrc_set_ppgtt(lrc, vm); 765 766 if (vm->xef) 767 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 768 } 769 770 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 771 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 772 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 773 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 774 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 775 if (xe->info.has_asid && vm) 776 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); 777 778 lrc->desc = LRC_VALID; 779 lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; 780 /* TODO: Priority */ 781 782 /* While this appears to have something about privileged batches or 783 * some such, it really just means PPGTT mode. 784 */ 785 if (vm) 786 lrc->desc |= LRC_PRIVILEGE; 787 788 if (GRAPHICS_VERx100(xe) < 1250) { 789 lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; 790 lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT; 791 } 792 793 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 794 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 795 796 map = __xe_lrc_seqno_map(lrc); 797 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 798 799 map = __xe_lrc_start_seqno_map(lrc); 800 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 801 802 return 0; 803 804 err_lrc_finish: 805 xe_lrc_finish(lrc); 806 return err; 807 } 808 809 void xe_lrc_finish(struct xe_lrc *lrc) 810 { 811 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 812 xe_bo_lock(lrc->bo, false); 813 xe_bo_unpin(lrc->bo); 814 xe_bo_unlock(lrc->bo); 815 xe_bo_put(lrc->bo); 816 } 817 818 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 819 { 820 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 821 } 822 823 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 824 { 825 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 826 } 827 828 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 829 { 830 const u32 head = xe_lrc_ring_head(lrc); 831 const u32 tail = lrc->ring.tail; 832 const u32 size = lrc->ring.size; 833 834 return ((head - tail - 1) & (size - 1)) + 1; 835 } 836 837 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 838 const void *data, size_t size) 839 { 840 struct xe_device *xe = lrc_to_xe(lrc); 841 842 iosys_map_incr(&ring, lrc->ring.tail); 843 xe_map_memcpy_to(xe, &ring, 0, data, size); 844 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 845 } 846 847 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 848 { 849 struct xe_device *xe = lrc_to_xe(lrc); 850 struct iosys_map ring; 851 u32 rhs; 852 size_t aligned_size; 853 854 xe_assert(xe, IS_ALIGNED(size, 4)); 855 aligned_size = ALIGN(size, 8); 856 857 ring = __xe_lrc_ring_map(lrc); 858 859 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 860 rhs = lrc->ring.size - lrc->ring.tail; 861 if (size > rhs) { 862 __xe_lrc_write_ring(lrc, ring, data, rhs); 863 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 864 } else { 865 __xe_lrc_write_ring(lrc, ring, data, size); 866 } 867 868 if (aligned_size > size) { 869 u32 noop = MI_NOOP; 870 871 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 872 } 873 } 874 875 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 876 { 877 return lrc->desc | xe_lrc_ggtt_addr(lrc); 878 } 879 880 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 881 { 882 return __xe_lrc_seqno_ggtt_addr(lrc); 883 } 884 885 struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc) 886 { 887 return &xe_hw_fence_create(&lrc->fence_ctx, 888 __xe_lrc_seqno_map(lrc))->dma; 889 } 890 891 s32 xe_lrc_seqno(struct xe_lrc *lrc) 892 { 893 struct iosys_map map = __xe_lrc_seqno_map(lrc); 894 895 return xe_map_read32(lrc_to_xe(lrc), &map); 896 } 897 898 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 899 { 900 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 901 902 return xe_map_read32(lrc_to_xe(lrc), &map); 903 } 904 905 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 906 { 907 return __xe_lrc_start_seqno_ggtt_addr(lrc); 908 } 909 910 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 911 { 912 return __xe_lrc_parallel_ggtt_addr(lrc); 913 } 914 915 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 916 { 917 return __xe_lrc_parallel_map(lrc); 918 } 919 920 static int instr_dw(u32 cmd_header) 921 { 922 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 923 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 924 GFXPIPE_SINGLE_DW_CMD(0, 0)) 925 return 1; 926 927 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 928 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 929 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 930 931 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 932 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 933 } 934 935 static int dump_mi_command(struct drm_printer *p, 936 struct xe_gt *gt, 937 u32 *dw, 938 int remaining_dw) 939 { 940 u32 inst_header = *dw; 941 u32 numdw = instr_dw(inst_header); 942 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 943 int num_noop; 944 945 /* First check for commands that don't have/use a '# DW' field */ 946 switch (inst_header & MI_OPCODE) { 947 case MI_NOOP: 948 num_noop = 1; 949 while (num_noop < remaining_dw && 950 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 951 num_noop++; 952 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 953 return num_noop; 954 955 case MI_TOPOLOGY_FILTER: 956 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 957 return 1; 958 959 case MI_BATCH_BUFFER_END: 960 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 961 /* Return 'remaining_dw' to consume the rest of the LRC */ 962 return remaining_dw; 963 } 964 965 /* 966 * Any remaining commands include a # of dwords. We should make sure 967 * it doesn't exceed the remaining size of the LRC. 968 */ 969 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 970 numdw = remaining_dw; 971 972 switch (inst_header & MI_OPCODE) { 973 case MI_LOAD_REGISTER_IMM: 974 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 975 inst_header, (numdw - 1) / 2); 976 for (int i = 1; i < numdw; i += 2) 977 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 978 return numdw; 979 980 case MI_LOAD_REGISTER_MEM & MI_OPCODE: 981 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", 982 inst_header, 983 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", 984 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); 985 if (numdw == 4) 986 drm_printf(p, " - %#6x = %#010llx\n", 987 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); 988 else 989 drm_printf(p, " - %*ph (%s)\n", 990 (int)sizeof(u32) * (numdw - 1), dw + 1, 991 numdw < 4 ? "truncated" : "malformed"); 992 return numdw; 993 994 case MI_FORCE_WAKEUP: 995 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 996 return numdw; 997 998 default: 999 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 1000 inst_header, opcode, numdw); 1001 return numdw; 1002 } 1003 } 1004 1005 static int dump_gfxpipe_command(struct drm_printer *p, 1006 struct xe_gt *gt, 1007 u32 *dw, 1008 int remaining_dw) 1009 { 1010 u32 numdw = instr_dw(*dw); 1011 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 1012 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 1013 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 1014 1015 /* 1016 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1017 * remaining size of the LRC. 1018 */ 1019 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1020 numdw = remaining_dw; 1021 1022 switch (*dw & GFXPIPE_MATCH_MASK) { 1023 #define MATCH(cmd) \ 1024 case cmd: \ 1025 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1026 return numdw 1027 #define MATCH3D(cmd) \ 1028 case CMD_##cmd: \ 1029 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1030 return numdw 1031 1032 MATCH(STATE_BASE_ADDRESS); 1033 MATCH(STATE_SIP); 1034 MATCH(GPGPU_CSR_BASE_ADDRESS); 1035 MATCH(STATE_COMPUTE_MODE); 1036 MATCH3D(3DSTATE_BTD); 1037 1038 MATCH3D(3DSTATE_VF_STATISTICS); 1039 1040 MATCH(PIPELINE_SELECT); 1041 1042 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1043 MATCH3D(3DSTATE_CLEAR_PARAMS); 1044 MATCH3D(3DSTATE_DEPTH_BUFFER); 1045 MATCH3D(3DSTATE_STENCIL_BUFFER); 1046 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1047 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1048 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1049 MATCH3D(3DSTATE_INDEX_BUFFER); 1050 MATCH3D(3DSTATE_VF); 1051 MATCH3D(3DSTATE_MULTISAMPLE); 1052 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1053 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1054 MATCH3D(3DSTATE_VS); 1055 MATCH3D(3DSTATE_GS); 1056 MATCH3D(3DSTATE_CLIP); 1057 MATCH3D(3DSTATE_SF); 1058 MATCH3D(3DSTATE_WM); 1059 MATCH3D(3DSTATE_CONSTANT_VS); 1060 MATCH3D(3DSTATE_CONSTANT_GS); 1061 MATCH3D(3DSTATE_SAMPLE_MASK); 1062 MATCH3D(3DSTATE_CONSTANT_HS); 1063 MATCH3D(3DSTATE_CONSTANT_DS); 1064 MATCH3D(3DSTATE_HS); 1065 MATCH3D(3DSTATE_TE); 1066 MATCH3D(3DSTATE_DS); 1067 MATCH3D(3DSTATE_STREAMOUT); 1068 MATCH3D(3DSTATE_SBE); 1069 MATCH3D(3DSTATE_PS); 1070 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1071 MATCH3D(3DSTATE_CPS_POINTERS); 1072 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1073 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1074 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1075 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1076 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1077 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1078 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1079 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1080 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1081 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1082 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1083 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1084 MATCH3D(3DSTATE_VF_INSTANCING); 1085 MATCH3D(3DSTATE_VF_SGVS); 1086 MATCH3D(3DSTATE_VF_TOPOLOGY); 1087 MATCH3D(3DSTATE_WM_CHROMAKEY); 1088 MATCH3D(3DSTATE_PS_BLEND); 1089 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1090 MATCH3D(3DSTATE_PS_EXTRA); 1091 MATCH3D(3DSTATE_RASTER); 1092 MATCH3D(3DSTATE_SBE_SWIZ); 1093 MATCH3D(3DSTATE_WM_HZ_OP); 1094 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1095 MATCH3D(3DSTATE_VF_SGVS_2); 1096 MATCH3D(3DSTATE_VFG); 1097 MATCH3D(3DSTATE_URB_ALLOC_VS); 1098 MATCH3D(3DSTATE_URB_ALLOC_HS); 1099 MATCH3D(3DSTATE_URB_ALLOC_DS); 1100 MATCH3D(3DSTATE_URB_ALLOC_GS); 1101 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1102 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1103 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1104 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1105 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1106 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1107 MATCH3D(3DSTATE_AMFS); 1108 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1109 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1110 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1111 MATCH3D(3DSTATE_MESH_CONTROL); 1112 MATCH3D(3DSTATE_MESH_DISTRIB); 1113 MATCH3D(3DSTATE_TASK_REDISTRIB); 1114 MATCH3D(3DSTATE_MESH_SHADER); 1115 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1116 MATCH3D(3DSTATE_TASK_CONTROL); 1117 MATCH3D(3DSTATE_TASK_SHADER); 1118 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1119 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1120 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1121 MATCH3D(3DSTATE_CLIP_MESH); 1122 MATCH3D(3DSTATE_SBE_MESH); 1123 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1124 1125 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1126 MATCH3D(3DSTATE_CHROMA_KEY); 1127 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1128 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1129 MATCH3D(3DSTATE_LINE_STIPPLE); 1130 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1131 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1132 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1133 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1134 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1135 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1136 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1137 MATCH3D(3DSTATE_SO_DECL_LIST); 1138 MATCH3D(3DSTATE_SO_BUFFER); 1139 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1140 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1141 MATCH3D(3DSTATE_3D_MODE); 1142 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1143 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1144 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1145 1146 default: 1147 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1148 *dw, pipeline, opcode, subopcode, numdw); 1149 return numdw; 1150 } 1151 } 1152 1153 void xe_lrc_dump_default(struct drm_printer *p, 1154 struct xe_gt *gt, 1155 enum xe_engine_class hwe_class) 1156 { 1157 u32 *dw; 1158 int remaining_dw, num_dw; 1159 1160 if (!gt->default_lrc[hwe_class]) { 1161 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1162 return; 1163 } 1164 1165 /* 1166 * Skip the beginning of the LRC since it contains the per-process 1167 * hardware status page. 1168 */ 1169 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1170 remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4; 1171 1172 while (remaining_dw > 0) { 1173 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1174 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1175 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1176 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1177 } else { 1178 num_dw = min(instr_dw(*dw), remaining_dw); 1179 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 1180 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 1181 num_dw); 1182 } 1183 1184 dw += num_dw; 1185 remaining_dw -= num_dw; 1186 } 1187 } 1188 1189 struct instr_state { 1190 u32 instr; 1191 u16 num_dw; 1192 }; 1193 1194 static const struct instr_state xe_hpg_svg_state[] = { 1195 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 1196 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 1197 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 1198 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 1199 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 1200 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 1201 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 1202 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 1203 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 1204 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 1205 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 1206 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 1207 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 1208 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 1209 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 1210 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 1211 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 1212 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 1213 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 1214 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 1215 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 1216 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 1217 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 1218 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 1219 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 1220 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 1221 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 1222 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 1223 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 1224 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 1225 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 1226 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 1227 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 1228 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 1229 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 1230 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 1231 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 1232 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 1233 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 1234 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 1235 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 1236 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 1237 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 1238 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 1239 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 1240 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 1241 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 1242 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 1243 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 1244 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 1245 }; 1246 1247 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) 1248 { 1249 struct xe_gt *gt = q->hwe->gt; 1250 struct xe_device *xe = gt_to_xe(gt); 1251 const struct instr_state *state_table = NULL; 1252 int state_table_size = 0; 1253 1254 /* 1255 * At the moment we only need to emit non-register state for the RCS 1256 * engine. 1257 */ 1258 if (q->hwe->class != XE_ENGINE_CLASS_RENDER) 1259 return; 1260 1261 switch (GRAPHICS_VERx100(xe)) { 1262 case 1255: 1263 case 1270 ... 2004: 1264 state_table = xe_hpg_svg_state; 1265 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1266 break; 1267 default: 1268 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1269 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1270 return; 1271 } 1272 1273 for (int i = 0; i < state_table_size; i++) { 1274 u32 instr = state_table[i].instr; 1275 u16 num_dw = state_table[i].num_dw; 1276 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 1277 1278 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 1279 xe_gt_assert(gt, num_dw != 0); 1280 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 1281 1282 /* 1283 * Xe2's SVG context is the same as the one on DG2 / MTL 1284 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 1285 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 1286 * Just make the replacement here rather than defining a 1287 * whole separate table for the single trivial change. 1288 */ 1289 if (GRAPHICS_VER(xe) >= 20 && 1290 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 1291 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 1292 1293 bb->cs[bb->len] = instr; 1294 if (!is_single_dw) 1295 bb->cs[bb->len] |= (num_dw - 2); 1296 1297 bb->len += num_dw; 1298 } 1299 } 1300