1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include "instructions/xe_mi_commands.h" 9 #include "instructions/xe_gfxpipe_commands.h" 10 #include "regs/xe_engine_regs.h" 11 #include "regs/xe_gpu_commands.h" 12 #include "regs/xe_lrc_layout.h" 13 #include "xe_bb.h" 14 #include "xe_bo.h" 15 #include "xe_device.h" 16 #include "xe_drm_client.h" 17 #include "xe_exec_queue_types.h" 18 #include "xe_gt.h" 19 #include "xe_gt_printk.h" 20 #include "xe_hw_fence.h" 21 #include "xe_map.h" 22 #include "xe_memirq.h" 23 #include "xe_sriov.h" 24 #include "xe_vm.h" 25 26 #define LRC_VALID (1 << 0) 27 #define LRC_PRIVILEGE (1 << 8) 28 #define LRC_ADDRESSING_MODE_SHIFT 3 29 #define LRC_LEGACY_64B_CONTEXT 3 30 31 #define ENGINE_CLASS_SHIFT 61 32 #define ENGINE_INSTANCE_SHIFT 48 33 34 static struct xe_device * 35 lrc_to_xe(struct xe_lrc *lrc) 36 { 37 return gt_to_xe(lrc->fence_ctx.gt); 38 } 39 40 size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) 41 { 42 switch (class) { 43 case XE_ENGINE_CLASS_RENDER: 44 if (GRAPHICS_VER(xe) >= 20) 45 return 4 * SZ_4K; 46 else 47 return 14 * SZ_4K; 48 case XE_ENGINE_CLASS_COMPUTE: 49 /* 14 pages since graphics_ver == 11 */ 50 if (GRAPHICS_VER(xe) >= 20) 51 return 3 * SZ_4K; 52 else 53 return 14 * SZ_4K; 54 default: 55 WARN(1, "Unknown engine class: %d", class); 56 fallthrough; 57 case XE_ENGINE_CLASS_COPY: 58 case XE_ENGINE_CLASS_VIDEO_DECODE: 59 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 60 case XE_ENGINE_CLASS_OTHER: 61 return 2 * SZ_4K; 62 } 63 } 64 65 /* 66 * The per-platform tables are u8-encoded in @data. Decode @data and set the 67 * addresses' offset and commands in @regs. The following encoding is used 68 * for each byte. There are 2 steps: decoding commands and decoding addresses. 69 * 70 * Commands: 71 * [7]: create NOPs - number of NOPs are set in lower bits 72 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 73 * MI_LRI_FORCE_POSTED 74 * [5:0]: Number of NOPs or registers to set values to in case of 75 * MI_LOAD_REGISTER_IMM 76 * 77 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 78 * number of registers. They are set by using the REG/REG16 macros: the former 79 * is used for offsets smaller than 0x200 while the latter is for values bigger 80 * than that. Those macros already set all the bits documented below correctly: 81 * 82 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 83 * follow, for the lower bits 84 * [6:0]: Register offset, without considering the engine base. 85 * 86 * This function only tweaks the commands and register offsets. Values are not 87 * filled out. 88 */ 89 static void set_offsets(u32 *regs, 90 const u8 *data, 91 const struct xe_hw_engine *hwe) 92 #define NOP(x) (BIT(7) | (x)) 93 #define LRI(count, flags) ((flags) << 6 | (count) | \ 94 BUILD_BUG_ON_ZERO(count >= BIT(6))) 95 #define POSTED BIT(0) 96 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 97 #define REG16(x) \ 98 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 99 (((x) >> 2) & 0x7f) 100 { 101 const u32 base = hwe->mmio_base; 102 103 while (*data) { 104 u8 count, flags; 105 106 if (*data & BIT(7)) { /* skip */ 107 count = *data++ & ~BIT(7); 108 regs += count; 109 continue; 110 } 111 112 count = *data & 0x3f; 113 flags = *data >> 6; 114 data++; 115 116 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 117 if (flags & POSTED) 118 *regs |= MI_LRI_FORCE_POSTED; 119 *regs |= MI_LRI_LRM_CS_MMIO; 120 regs++; 121 122 xe_gt_assert(hwe->gt, count); 123 do { 124 u32 offset = 0; 125 u8 v; 126 127 do { 128 v = *data++; 129 offset <<= 7; 130 offset |= v & ~BIT(7); 131 } while (v & BIT(7)); 132 133 regs[0] = base + (offset << 2); 134 regs += 2; 135 } while (--count); 136 } 137 138 *regs = MI_BATCH_BUFFER_END | BIT(0); 139 } 140 141 static const u8 gen12_xcs_offsets[] = { 142 NOP(1), 143 LRI(13, POSTED), 144 REG16(0x244), 145 REG(0x034), 146 REG(0x030), 147 REG(0x038), 148 REG(0x03c), 149 REG(0x168), 150 REG(0x140), 151 REG(0x110), 152 REG(0x1c0), 153 REG(0x1c4), 154 REG(0x1c8), 155 REG(0x180), 156 REG16(0x2b4), 157 158 NOP(5), 159 LRI(9, POSTED), 160 REG16(0x3a8), 161 REG16(0x28c), 162 REG16(0x288), 163 REG16(0x284), 164 REG16(0x280), 165 REG16(0x27c), 166 REG16(0x278), 167 REG16(0x274), 168 REG16(0x270), 169 170 0 171 }; 172 173 static const u8 dg2_xcs_offsets[] = { 174 NOP(1), 175 LRI(15, POSTED), 176 REG16(0x244), 177 REG(0x034), 178 REG(0x030), 179 REG(0x038), 180 REG(0x03c), 181 REG(0x168), 182 REG(0x140), 183 REG(0x110), 184 REG(0x1c0), 185 REG(0x1c4), 186 REG(0x1c8), 187 REG(0x180), 188 REG16(0x2b4), 189 REG(0x120), 190 REG(0x124), 191 192 NOP(1), 193 LRI(9, POSTED), 194 REG16(0x3a8), 195 REG16(0x28c), 196 REG16(0x288), 197 REG16(0x284), 198 REG16(0x280), 199 REG16(0x27c), 200 REG16(0x278), 201 REG16(0x274), 202 REG16(0x270), 203 204 0 205 }; 206 207 static const u8 gen12_rcs_offsets[] = { 208 NOP(1), 209 LRI(13, POSTED), 210 REG16(0x244), 211 REG(0x034), 212 REG(0x030), 213 REG(0x038), 214 REG(0x03c), 215 REG(0x168), 216 REG(0x140), 217 REG(0x110), 218 REG(0x1c0), 219 REG(0x1c4), 220 REG(0x1c8), 221 REG(0x180), 222 REG16(0x2b4), 223 224 NOP(5), 225 LRI(9, POSTED), 226 REG16(0x3a8), 227 REG16(0x28c), 228 REG16(0x288), 229 REG16(0x284), 230 REG16(0x280), 231 REG16(0x27c), 232 REG16(0x278), 233 REG16(0x274), 234 REG16(0x270), 235 236 LRI(3, POSTED), 237 REG(0x1b0), 238 REG16(0x5a8), 239 REG16(0x5ac), 240 241 NOP(6), 242 LRI(1, 0), 243 REG(0x0c8), 244 NOP(3 + 9 + 1), 245 246 LRI(51, POSTED), 247 REG16(0x588), 248 REG16(0x588), 249 REG16(0x588), 250 REG16(0x588), 251 REG16(0x588), 252 REG16(0x588), 253 REG(0x028), 254 REG(0x09c), 255 REG(0x0c0), 256 REG(0x178), 257 REG(0x17c), 258 REG16(0x358), 259 REG(0x170), 260 REG(0x150), 261 REG(0x154), 262 REG(0x158), 263 REG16(0x41c), 264 REG16(0x600), 265 REG16(0x604), 266 REG16(0x608), 267 REG16(0x60c), 268 REG16(0x610), 269 REG16(0x614), 270 REG16(0x618), 271 REG16(0x61c), 272 REG16(0x620), 273 REG16(0x624), 274 REG16(0x628), 275 REG16(0x62c), 276 REG16(0x630), 277 REG16(0x634), 278 REG16(0x638), 279 REG16(0x63c), 280 REG16(0x640), 281 REG16(0x644), 282 REG16(0x648), 283 REG16(0x64c), 284 REG16(0x650), 285 REG16(0x654), 286 REG16(0x658), 287 REG16(0x65c), 288 REG16(0x660), 289 REG16(0x664), 290 REG16(0x668), 291 REG16(0x66c), 292 REG16(0x670), 293 REG16(0x674), 294 REG16(0x678), 295 REG16(0x67c), 296 REG(0x068), 297 REG(0x084), 298 NOP(1), 299 300 0 301 }; 302 303 static const u8 xehp_rcs_offsets[] = { 304 NOP(1), 305 LRI(13, POSTED), 306 REG16(0x244), 307 REG(0x034), 308 REG(0x030), 309 REG(0x038), 310 REG(0x03c), 311 REG(0x168), 312 REG(0x140), 313 REG(0x110), 314 REG(0x1c0), 315 REG(0x1c4), 316 REG(0x1c8), 317 REG(0x180), 318 REG16(0x2b4), 319 320 NOP(5), 321 LRI(9, POSTED), 322 REG16(0x3a8), 323 REG16(0x28c), 324 REG16(0x288), 325 REG16(0x284), 326 REG16(0x280), 327 REG16(0x27c), 328 REG16(0x278), 329 REG16(0x274), 330 REG16(0x270), 331 332 LRI(3, POSTED), 333 REG(0x1b0), 334 REG16(0x5a8), 335 REG16(0x5ac), 336 337 NOP(6), 338 LRI(1, 0), 339 REG(0x0c8), 340 341 0 342 }; 343 344 static const u8 dg2_rcs_offsets[] = { 345 NOP(1), 346 LRI(15, POSTED), 347 REG16(0x244), 348 REG(0x034), 349 REG(0x030), 350 REG(0x038), 351 REG(0x03c), 352 REG(0x168), 353 REG(0x140), 354 REG(0x110), 355 REG(0x1c0), 356 REG(0x1c4), 357 REG(0x1c8), 358 REG(0x180), 359 REG16(0x2b4), 360 REG(0x120), 361 REG(0x124), 362 363 NOP(1), 364 LRI(9, POSTED), 365 REG16(0x3a8), 366 REG16(0x28c), 367 REG16(0x288), 368 REG16(0x284), 369 REG16(0x280), 370 REG16(0x27c), 371 REG16(0x278), 372 REG16(0x274), 373 REG16(0x270), 374 375 LRI(3, POSTED), 376 REG(0x1b0), 377 REG16(0x5a8), 378 REG16(0x5ac), 379 380 NOP(6), 381 LRI(1, 0), 382 REG(0x0c8), 383 384 0 385 }; 386 387 static const u8 mtl_rcs_offsets[] = { 388 NOP(1), 389 LRI(15, POSTED), 390 REG16(0x244), 391 REG(0x034), 392 REG(0x030), 393 REG(0x038), 394 REG(0x03c), 395 REG(0x168), 396 REG(0x140), 397 REG(0x110), 398 REG(0x1c0), 399 REG(0x1c4), 400 REG(0x1c8), 401 REG(0x180), 402 REG16(0x2b4), 403 REG(0x120), 404 REG(0x124), 405 406 NOP(1), 407 LRI(9, POSTED), 408 REG16(0x3a8), 409 REG16(0x28c), 410 REG16(0x288), 411 REG16(0x284), 412 REG16(0x280), 413 REG16(0x27c), 414 REG16(0x278), 415 REG16(0x274), 416 REG16(0x270), 417 418 NOP(2), 419 LRI(2, POSTED), 420 REG16(0x5a8), 421 REG16(0x5ac), 422 423 NOP(6), 424 LRI(1, 0), 425 REG(0x0c8), 426 427 0 428 }; 429 430 #define XE2_CTX_COMMON \ 431 NOP(1), /* [0x00] */ \ 432 LRI(15, POSTED), /* [0x01] */ \ 433 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 434 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 435 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 436 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 437 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 438 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 439 REG(0x140), /* [0x0e] BB_ADDR */ \ 440 REG(0x110), /* [0x10] BB_STATE */ \ 441 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 442 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 443 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 444 REG(0x180), /* [0x18] CCID */ \ 445 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 446 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 447 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 448 \ 449 NOP(1), /* [0x20] */ \ 450 LRI(9, POSTED), /* [0x21] */ \ 451 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 452 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 453 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 454 REG16(0x284), /* [0x28] dummy reg */ \ 455 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 456 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 457 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 458 REG16(0x274), /* [0x30] PTBP_UDW */ \ 459 REG16(0x270) /* [0x32] PTBP_LDW */ 460 461 static const u8 xe2_rcs_offsets[] = { 462 XE2_CTX_COMMON, 463 464 NOP(2), /* [0x34] */ 465 LRI(2, POSTED), /* [0x36] */ 466 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 467 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 468 469 NOP(6), /* [0x41] */ 470 LRI(1, 0), /* [0x47] */ 471 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 472 473 0 474 }; 475 476 static const u8 xe2_bcs_offsets[] = { 477 XE2_CTX_COMMON, 478 479 NOP(4 + 8 + 1), /* [0x34] */ 480 LRI(2, POSTED), /* [0x41] */ 481 REG16(0x200), /* [0x42] BCS_SWCTRL */ 482 REG16(0x204), /* [0x44] BLIT_CCTL */ 483 484 0 485 }; 486 487 static const u8 xe2_xcs_offsets[] = { 488 XE2_CTX_COMMON, 489 490 0 491 }; 492 493 #undef REG16 494 #undef REG 495 #undef LRI 496 #undef NOP 497 498 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 499 { 500 if (class == XE_ENGINE_CLASS_RENDER) { 501 if (GRAPHICS_VER(xe) >= 20) 502 return xe2_rcs_offsets; 503 else if (GRAPHICS_VERx100(xe) >= 1270) 504 return mtl_rcs_offsets; 505 else if (GRAPHICS_VERx100(xe) >= 1255) 506 return dg2_rcs_offsets; 507 else if (GRAPHICS_VERx100(xe) >= 1250) 508 return xehp_rcs_offsets; 509 else 510 return gen12_rcs_offsets; 511 } else if (class == XE_ENGINE_CLASS_COPY) { 512 if (GRAPHICS_VER(xe) >= 20) 513 return xe2_bcs_offsets; 514 else 515 return gen12_xcs_offsets; 516 } else { 517 if (GRAPHICS_VER(xe) >= 20) 518 return xe2_xcs_offsets; 519 else if (GRAPHICS_VERx100(xe) >= 1255) 520 return dg2_xcs_offsets; 521 else 522 return gen12_xcs_offsets; 523 } 524 } 525 526 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 527 { 528 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | 529 _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | 530 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; 531 532 /* TODO: Timestamp */ 533 } 534 535 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) 536 { 537 struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq; 538 struct xe_device *xe = gt_to_xe(hwe->gt); 539 540 if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe)) 541 return; 542 543 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | 544 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; 545 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 546 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 547 548 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | 549 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 550 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 551 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq); 552 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 553 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq); 554 } 555 556 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 557 { 558 struct xe_device *xe = gt_to_xe(hwe->gt); 559 560 if (GRAPHICS_VERx100(xe) >= 1250) 561 return 0x70; 562 else 563 return 0x60; 564 } 565 566 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 567 { 568 int x; 569 570 x = lrc_ring_mi_mode(hwe); 571 regs[x + 1] &= ~STOP_RING; 572 regs[x + 1] |= STOP_RING << 16; 573 } 574 575 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 576 { 577 return 0; 578 } 579 580 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 581 { 582 return lrc->ring.size; 583 } 584 585 /* Make the magic macros work */ 586 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 587 588 #define LRC_SEQNO_PPHWSP_OFFSET 512 589 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 590 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 591 #define LRC_PPHWSP_SIZE SZ_4K 592 593 static size_t lrc_reg_size(struct xe_device *xe) 594 { 595 if (GRAPHICS_VERx100(xe) >= 1250) 596 return 96 * sizeof(u32); 597 else 598 return 80 * sizeof(u32); 599 } 600 601 size_t xe_lrc_skip_size(struct xe_device *xe) 602 { 603 return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 604 } 605 606 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 607 { 608 /* The seqno is stored in the driver-defined portion of PPHWSP */ 609 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 610 } 611 612 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 613 { 614 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 615 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 616 } 617 618 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 619 { 620 /* The parallel is stored in the driver-defined portion of PPHWSP */ 621 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 622 } 623 624 static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) 625 { 626 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 627 } 628 629 #define DECL_MAP_ADDR_HELPERS(elem) \ 630 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 631 { \ 632 struct iosys_map map = lrc->bo->vmap; \ 633 \ 634 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 635 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 636 return map; \ 637 } \ 638 static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 639 { \ 640 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 641 } \ 642 643 DECL_MAP_ADDR_HELPERS(ring) 644 DECL_MAP_ADDR_HELPERS(pphwsp) 645 DECL_MAP_ADDR_HELPERS(seqno) 646 DECL_MAP_ADDR_HELPERS(regs) 647 DECL_MAP_ADDR_HELPERS(start_seqno) 648 DECL_MAP_ADDR_HELPERS(parallel) 649 650 #undef DECL_MAP_ADDR_HELPERS 651 652 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 653 { 654 return __xe_lrc_pphwsp_ggtt_addr(lrc); 655 } 656 657 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 658 { 659 struct xe_device *xe = lrc_to_xe(lrc); 660 struct iosys_map map; 661 662 map = __xe_lrc_regs_map(lrc); 663 iosys_map_incr(&map, reg_nr * sizeof(u32)); 664 return xe_map_read32(xe, &map); 665 } 666 667 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 668 { 669 struct xe_device *xe = lrc_to_xe(lrc); 670 struct iosys_map map; 671 672 map = __xe_lrc_regs_map(lrc); 673 iosys_map_incr(&map, reg_nr * sizeof(u32)); 674 xe_map_write32(xe, &map, val); 675 } 676 677 static void *empty_lrc_data(struct xe_hw_engine *hwe) 678 { 679 struct xe_device *xe = gt_to_xe(hwe->gt); 680 void *data; 681 u32 *regs; 682 683 data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); 684 if (!data) 685 return NULL; 686 687 /* 1st page: Per-Process of HW status Page */ 688 regs = data + LRC_PPHWSP_SIZE; 689 set_offsets(regs, reg_offsets(xe, hwe->class), hwe); 690 set_context_control(regs, hwe); 691 set_memory_based_intr(regs, hwe); 692 reset_stop_ring(regs, hwe); 693 694 return data; 695 } 696 697 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 698 { 699 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); 700 701 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 702 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 703 } 704 705 #define PVC_CTX_ASID (0x2e + 1) 706 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 707 708 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 709 struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) 710 { 711 struct xe_gt *gt = hwe->gt; 712 struct xe_tile *tile = gt_to_tile(gt); 713 struct xe_device *xe = gt_to_xe(gt); 714 struct iosys_map map; 715 void *init_data = NULL; 716 u32 arb_enable; 717 int err; 718 719 lrc->flags = 0; 720 721 /* 722 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 723 * via VM bind calls. 724 */ 725 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, 726 ring_size + xe_lrc_size(xe, hwe->class), 727 ttm_bo_type_kernel, 728 XE_BO_CREATE_VRAM_IF_DGFX(tile) | 729 XE_BO_CREATE_GGTT_BIT); 730 if (IS_ERR(lrc->bo)) 731 return PTR_ERR(lrc->bo); 732 733 lrc->tile = gt_to_tile(hwe->gt); 734 lrc->ring.size = ring_size; 735 lrc->ring.tail = 0; 736 737 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 738 hwe->fence_irq, hwe->name); 739 740 if (!gt->default_lrc[hwe->class]) { 741 init_data = empty_lrc_data(hwe); 742 if (!init_data) { 743 err = -ENOMEM; 744 goto err_lrc_finish; 745 } 746 } 747 748 /* 749 * Init Per-Process of HW status Page, LRC / context state to known 750 * values 751 */ 752 map = __xe_lrc_pphwsp_map(lrc); 753 if (!init_data) { 754 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 755 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 756 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 757 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); 758 } else { 759 xe_map_memcpy_to(xe, &map, 0, init_data, 760 xe_lrc_size(xe, hwe->class)); 761 kfree(init_data); 762 } 763 764 if (vm) { 765 xe_lrc_set_ppgtt(lrc, vm); 766 767 if (vm->xef) 768 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 769 } 770 771 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 772 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 773 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 774 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 775 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 776 if (xe->info.has_asid && vm) 777 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); 778 779 lrc->desc = LRC_VALID; 780 lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; 781 /* TODO: Priority */ 782 783 /* While this appears to have something about privileged batches or 784 * some such, it really just means PPGTT mode. 785 */ 786 if (vm) 787 lrc->desc |= LRC_PRIVILEGE; 788 789 if (GRAPHICS_VERx100(xe) < 1250) { 790 lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; 791 lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT; 792 } 793 794 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 795 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 796 797 map = __xe_lrc_seqno_map(lrc); 798 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 799 800 map = __xe_lrc_start_seqno_map(lrc); 801 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 802 803 return 0; 804 805 err_lrc_finish: 806 xe_lrc_finish(lrc); 807 return err; 808 } 809 810 void xe_lrc_finish(struct xe_lrc *lrc) 811 { 812 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 813 xe_bo_lock(lrc->bo, false); 814 xe_bo_unpin(lrc->bo); 815 xe_bo_unlock(lrc->bo); 816 xe_bo_put(lrc->bo); 817 } 818 819 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 820 { 821 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 822 } 823 824 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 825 { 826 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 827 } 828 829 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 830 { 831 const u32 head = xe_lrc_ring_head(lrc); 832 const u32 tail = lrc->ring.tail; 833 const u32 size = lrc->ring.size; 834 835 return ((head - tail - 1) & (size - 1)) + 1; 836 } 837 838 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 839 const void *data, size_t size) 840 { 841 struct xe_device *xe = lrc_to_xe(lrc); 842 843 iosys_map_incr(&ring, lrc->ring.tail); 844 xe_map_memcpy_to(xe, &ring, 0, data, size); 845 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 846 } 847 848 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 849 { 850 struct xe_device *xe = lrc_to_xe(lrc); 851 struct iosys_map ring; 852 u32 rhs; 853 size_t aligned_size; 854 855 xe_assert(xe, IS_ALIGNED(size, 4)); 856 aligned_size = ALIGN(size, 8); 857 858 ring = __xe_lrc_ring_map(lrc); 859 860 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 861 rhs = lrc->ring.size - lrc->ring.tail; 862 if (size > rhs) { 863 __xe_lrc_write_ring(lrc, ring, data, rhs); 864 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 865 } else { 866 __xe_lrc_write_ring(lrc, ring, data, size); 867 } 868 869 if (aligned_size > size) { 870 u32 noop = MI_NOOP; 871 872 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 873 } 874 } 875 876 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 877 { 878 return lrc->desc | xe_lrc_ggtt_addr(lrc); 879 } 880 881 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 882 { 883 return __xe_lrc_seqno_ggtt_addr(lrc); 884 } 885 886 struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc) 887 { 888 return &xe_hw_fence_create(&lrc->fence_ctx, 889 __xe_lrc_seqno_map(lrc))->dma; 890 } 891 892 s32 xe_lrc_seqno(struct xe_lrc *lrc) 893 { 894 struct iosys_map map = __xe_lrc_seqno_map(lrc); 895 896 return xe_map_read32(lrc_to_xe(lrc), &map); 897 } 898 899 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 900 { 901 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 902 903 return xe_map_read32(lrc_to_xe(lrc), &map); 904 } 905 906 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 907 { 908 return __xe_lrc_start_seqno_ggtt_addr(lrc); 909 } 910 911 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 912 { 913 return __xe_lrc_parallel_ggtt_addr(lrc); 914 } 915 916 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 917 { 918 return __xe_lrc_parallel_map(lrc); 919 } 920 921 static int instr_dw(u32 cmd_header) 922 { 923 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 924 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 925 GFXPIPE_SINGLE_DW_CMD(0, 0)) 926 return 1; 927 928 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 929 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 930 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 931 932 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 933 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 934 } 935 936 static int dump_mi_command(struct drm_printer *p, 937 struct xe_gt *gt, 938 u32 *dw, 939 int remaining_dw) 940 { 941 u32 inst_header = *dw; 942 u32 numdw = instr_dw(inst_header); 943 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 944 int num_noop; 945 946 /* First check for commands that don't have/use a '# DW' field */ 947 switch (inst_header & MI_OPCODE) { 948 case MI_NOOP: 949 num_noop = 1; 950 while (num_noop < remaining_dw && 951 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 952 num_noop++; 953 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 954 return num_noop; 955 956 case MI_TOPOLOGY_FILTER: 957 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 958 return 1; 959 960 case MI_BATCH_BUFFER_END: 961 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 962 /* Return 'remaining_dw' to consume the rest of the LRC */ 963 return remaining_dw; 964 } 965 966 /* 967 * Any remaining commands include a # of dwords. We should make sure 968 * it doesn't exceed the remaining size of the LRC. 969 */ 970 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 971 numdw = remaining_dw; 972 973 switch (inst_header & MI_OPCODE) { 974 case MI_LOAD_REGISTER_IMM: 975 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 976 inst_header, (numdw - 1) / 2); 977 for (int i = 1; i < numdw; i += 2) 978 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 979 return numdw; 980 981 case MI_LOAD_REGISTER_MEM & MI_OPCODE: 982 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", 983 inst_header, 984 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", 985 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); 986 if (numdw == 4) 987 drm_printf(p, " - %#6x = %#010llx\n", 988 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); 989 else 990 drm_printf(p, " - %*ph (%s)\n", 991 (int)sizeof(u32) * (numdw - 1), dw + 1, 992 numdw < 4 ? "truncated" : "malformed"); 993 return numdw; 994 995 case MI_FORCE_WAKEUP: 996 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 997 return numdw; 998 999 default: 1000 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 1001 inst_header, opcode, numdw); 1002 return numdw; 1003 } 1004 } 1005 1006 static int dump_gfxpipe_command(struct drm_printer *p, 1007 struct xe_gt *gt, 1008 u32 *dw, 1009 int remaining_dw) 1010 { 1011 u32 numdw = instr_dw(*dw); 1012 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 1013 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 1014 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 1015 1016 /* 1017 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1018 * remaining size of the LRC. 1019 */ 1020 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1021 numdw = remaining_dw; 1022 1023 switch (*dw & GFXPIPE_MATCH_MASK) { 1024 #define MATCH(cmd) \ 1025 case cmd: \ 1026 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1027 return numdw 1028 #define MATCH3D(cmd) \ 1029 case CMD_##cmd: \ 1030 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1031 return numdw 1032 1033 MATCH(STATE_BASE_ADDRESS); 1034 MATCH(STATE_SIP); 1035 MATCH(GPGPU_CSR_BASE_ADDRESS); 1036 MATCH(STATE_COMPUTE_MODE); 1037 MATCH3D(3DSTATE_BTD); 1038 1039 MATCH3D(3DSTATE_VF_STATISTICS); 1040 1041 MATCH(PIPELINE_SELECT); 1042 1043 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1044 MATCH3D(3DSTATE_CLEAR_PARAMS); 1045 MATCH3D(3DSTATE_DEPTH_BUFFER); 1046 MATCH3D(3DSTATE_STENCIL_BUFFER); 1047 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1048 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1049 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1050 MATCH3D(3DSTATE_INDEX_BUFFER); 1051 MATCH3D(3DSTATE_VF); 1052 MATCH3D(3DSTATE_MULTISAMPLE); 1053 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1054 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1055 MATCH3D(3DSTATE_VS); 1056 MATCH3D(3DSTATE_GS); 1057 MATCH3D(3DSTATE_CLIP); 1058 MATCH3D(3DSTATE_SF); 1059 MATCH3D(3DSTATE_WM); 1060 MATCH3D(3DSTATE_CONSTANT_VS); 1061 MATCH3D(3DSTATE_CONSTANT_GS); 1062 MATCH3D(3DSTATE_SAMPLE_MASK); 1063 MATCH3D(3DSTATE_CONSTANT_HS); 1064 MATCH3D(3DSTATE_CONSTANT_DS); 1065 MATCH3D(3DSTATE_HS); 1066 MATCH3D(3DSTATE_TE); 1067 MATCH3D(3DSTATE_DS); 1068 MATCH3D(3DSTATE_STREAMOUT); 1069 MATCH3D(3DSTATE_SBE); 1070 MATCH3D(3DSTATE_PS); 1071 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1072 MATCH3D(3DSTATE_CPS_POINTERS); 1073 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1074 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1075 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1076 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1077 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1078 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1079 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1080 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1081 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1082 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1083 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1084 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1085 MATCH3D(3DSTATE_VF_INSTANCING); 1086 MATCH3D(3DSTATE_VF_SGVS); 1087 MATCH3D(3DSTATE_VF_TOPOLOGY); 1088 MATCH3D(3DSTATE_WM_CHROMAKEY); 1089 MATCH3D(3DSTATE_PS_BLEND); 1090 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1091 MATCH3D(3DSTATE_PS_EXTRA); 1092 MATCH3D(3DSTATE_RASTER); 1093 MATCH3D(3DSTATE_SBE_SWIZ); 1094 MATCH3D(3DSTATE_WM_HZ_OP); 1095 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1096 MATCH3D(3DSTATE_VF_SGVS_2); 1097 MATCH3D(3DSTATE_VFG); 1098 MATCH3D(3DSTATE_URB_ALLOC_VS); 1099 MATCH3D(3DSTATE_URB_ALLOC_HS); 1100 MATCH3D(3DSTATE_URB_ALLOC_DS); 1101 MATCH3D(3DSTATE_URB_ALLOC_GS); 1102 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1103 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1104 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1105 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1106 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1107 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1108 MATCH3D(3DSTATE_AMFS); 1109 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1110 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1111 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1112 MATCH3D(3DSTATE_MESH_CONTROL); 1113 MATCH3D(3DSTATE_MESH_DISTRIB); 1114 MATCH3D(3DSTATE_TASK_REDISTRIB); 1115 MATCH3D(3DSTATE_MESH_SHADER); 1116 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1117 MATCH3D(3DSTATE_TASK_CONTROL); 1118 MATCH3D(3DSTATE_TASK_SHADER); 1119 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1120 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1121 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1122 MATCH3D(3DSTATE_CLIP_MESH); 1123 MATCH3D(3DSTATE_SBE_MESH); 1124 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1125 1126 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1127 MATCH3D(3DSTATE_CHROMA_KEY); 1128 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1129 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1130 MATCH3D(3DSTATE_LINE_STIPPLE); 1131 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1132 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1133 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1134 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1135 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1136 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1137 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1138 MATCH3D(3DSTATE_SO_DECL_LIST); 1139 MATCH3D(3DSTATE_SO_BUFFER); 1140 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1141 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1142 MATCH3D(3DSTATE_3D_MODE); 1143 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1144 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1145 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1146 1147 default: 1148 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1149 *dw, pipeline, opcode, subopcode, numdw); 1150 return numdw; 1151 } 1152 } 1153 1154 void xe_lrc_dump_default(struct drm_printer *p, 1155 struct xe_gt *gt, 1156 enum xe_engine_class hwe_class) 1157 { 1158 u32 *dw; 1159 int remaining_dw, num_dw; 1160 1161 if (!gt->default_lrc[hwe_class]) { 1162 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1163 return; 1164 } 1165 1166 /* 1167 * Skip the beginning of the LRC since it contains the per-process 1168 * hardware status page. 1169 */ 1170 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1171 remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4; 1172 1173 while (remaining_dw > 0) { 1174 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1175 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1176 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1177 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1178 } else { 1179 num_dw = min(instr_dw(*dw), remaining_dw); 1180 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 1181 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 1182 num_dw); 1183 } 1184 1185 dw += num_dw; 1186 remaining_dw -= num_dw; 1187 } 1188 } 1189 1190 struct instr_state { 1191 u32 instr; 1192 u16 num_dw; 1193 }; 1194 1195 static const struct instr_state xe_hpg_svg_state[] = { 1196 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 1197 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 1198 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 1199 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 1200 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 1201 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 1202 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 1203 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 1204 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 1205 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 1206 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 1207 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 1208 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 1209 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 1210 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 1211 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 1212 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 1213 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 1214 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 1215 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 1216 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 1217 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 1218 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 1219 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 1220 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 1221 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 1222 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 1223 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 1224 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 1225 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 1226 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 1227 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 1228 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 1229 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 1230 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 1231 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 1232 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 1233 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 1234 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 1235 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 1236 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 1237 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 1238 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 1239 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 1240 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 1241 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 1242 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 1243 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 1244 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 1245 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 1246 }; 1247 1248 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) 1249 { 1250 struct xe_gt *gt = q->hwe->gt; 1251 struct xe_device *xe = gt_to_xe(gt); 1252 const struct instr_state *state_table = NULL; 1253 int state_table_size = 0; 1254 1255 /* 1256 * At the moment we only need to emit non-register state for the RCS 1257 * engine. 1258 */ 1259 if (q->hwe->class != XE_ENGINE_CLASS_RENDER) 1260 return; 1261 1262 switch (GRAPHICS_VERx100(xe)) { 1263 case 1255: 1264 case 1270 ... 2004: 1265 state_table = xe_hpg_svg_state; 1266 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1267 break; 1268 default: 1269 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1270 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1271 return; 1272 } 1273 1274 for (int i = 0; i < state_table_size; i++) { 1275 u32 instr = state_table[i].instr; 1276 u16 num_dw = state_table[i].num_dw; 1277 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 1278 1279 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 1280 xe_gt_assert(gt, num_dw != 0); 1281 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 1282 1283 /* 1284 * Xe2's SVG context is the same as the one on DG2 / MTL 1285 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 1286 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 1287 * Just make the replacement here rather than defining a 1288 * whole separate table for the single trivial change. 1289 */ 1290 if (GRAPHICS_VER(xe) >= 20 && 1291 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 1292 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 1293 1294 bb->cs[bb->len] = instr; 1295 if (!is_single_dw) 1296 bb->cs[bb->len] |= (num_dw - 2); 1297 1298 bb->len += num_dw; 1299 } 1300 } 1301