1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include "instructions/xe_mi_commands.h" 9 #include "instructions/xe_gfxpipe_commands.h" 10 #include "regs/xe_engine_regs.h" 11 #include "regs/xe_gpu_commands.h" 12 #include "regs/xe_lrc_layout.h" 13 #include "xe_bb.h" 14 #include "xe_bo.h" 15 #include "xe_device.h" 16 #include "xe_drm_client.h" 17 #include "xe_exec_queue_types.h" 18 #include "xe_gt.h" 19 #include "xe_gt_printk.h" 20 #include "xe_hw_fence.h" 21 #include "xe_map.h" 22 #include "xe_memirq.h" 23 #include "xe_sriov.h" 24 #include "xe_vm.h" 25 26 #define LRC_VALID (1 << 0) 27 #define LRC_PRIVILEGE (1 << 8) 28 #define LRC_ADDRESSING_MODE_SHIFT 3 29 #define LRC_LEGACY_64B_CONTEXT 3 30 31 #define ENGINE_CLASS_SHIFT 61 32 #define ENGINE_INSTANCE_SHIFT 48 33 34 static struct xe_device * 35 lrc_to_xe(struct xe_lrc *lrc) 36 { 37 return gt_to_xe(lrc->fence_ctx.gt); 38 } 39 40 size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) 41 { 42 switch (class) { 43 case XE_ENGINE_CLASS_RENDER: 44 if (GRAPHICS_VER(xe) >= 20) 45 return 4 * SZ_4K; 46 else 47 return 14 * SZ_4K; 48 case XE_ENGINE_CLASS_COMPUTE: 49 /* 14 pages since graphics_ver == 11 */ 50 if (GRAPHICS_VER(xe) >= 20) 51 return 3 * SZ_4K; 52 else 53 return 14 * SZ_4K; 54 default: 55 WARN(1, "Unknown engine class: %d", class); 56 fallthrough; 57 case XE_ENGINE_CLASS_COPY: 58 case XE_ENGINE_CLASS_VIDEO_DECODE: 59 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 60 case XE_ENGINE_CLASS_OTHER: 61 return 2 * SZ_4K; 62 } 63 } 64 65 /* 66 * The per-platform tables are u8-encoded in @data. Decode @data and set the 67 * addresses' offset and commands in @regs. The following encoding is used 68 * for each byte. There are 2 steps: decoding commands and decoding addresses. 69 * 70 * Commands: 71 * [7]: create NOPs - number of NOPs are set in lower bits 72 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 73 * MI_LRI_FORCE_POSTED 74 * [5:0]: Number of NOPs or registers to set values to in case of 75 * MI_LOAD_REGISTER_IMM 76 * 77 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 78 * number of registers. They are set by using the REG/REG16 macros: the former 79 * is used for offsets smaller than 0x200 while the latter is for values bigger 80 * than that. Those macros already set all the bits documented below correctly: 81 * 82 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 83 * follow, for the lower bits 84 * [6:0]: Register offset, without considering the engine base. 85 * 86 * This function only tweaks the commands and register offsets. Values are not 87 * filled out. 88 */ 89 static void set_offsets(u32 *regs, 90 const u8 *data, 91 const struct xe_hw_engine *hwe) 92 #define NOP(x) (BIT(7) | (x)) 93 #define LRI(count, flags) ((flags) << 6 | (count) | \ 94 BUILD_BUG_ON_ZERO(count >= BIT(6))) 95 #define POSTED BIT(0) 96 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 97 #define REG16(x) \ 98 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 99 (((x) >> 2) & 0x7f) 100 #define END 0 101 { 102 const u32 base = hwe->mmio_base; 103 104 while (*data) { 105 u8 count, flags; 106 107 if (*data & BIT(7)) { /* skip */ 108 count = *data++ & ~BIT(7); 109 regs += count; 110 continue; 111 } 112 113 count = *data & 0x3f; 114 flags = *data >> 6; 115 data++; 116 117 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 118 if (flags & POSTED) 119 *regs |= MI_LRI_FORCE_POSTED; 120 *regs |= MI_LRI_LRM_CS_MMIO; 121 regs++; 122 123 xe_gt_assert(hwe->gt, count); 124 do { 125 u32 offset = 0; 126 u8 v; 127 128 do { 129 v = *data++; 130 offset <<= 7; 131 offset |= v & ~BIT(7); 132 } while (v & BIT(7)); 133 134 regs[0] = base + (offset << 2); 135 regs += 2; 136 } while (--count); 137 } 138 139 *regs = MI_BATCH_BUFFER_END | BIT(0); 140 } 141 142 static const u8 gen12_xcs_offsets[] = { 143 NOP(1), 144 LRI(13, POSTED), 145 REG16(0x244), 146 REG(0x034), 147 REG(0x030), 148 REG(0x038), 149 REG(0x03c), 150 REG(0x168), 151 REG(0x140), 152 REG(0x110), 153 REG(0x1c0), 154 REG(0x1c4), 155 REG(0x1c8), 156 REG(0x180), 157 REG16(0x2b4), 158 159 NOP(5), 160 LRI(9, POSTED), 161 REG16(0x3a8), 162 REG16(0x28c), 163 REG16(0x288), 164 REG16(0x284), 165 REG16(0x280), 166 REG16(0x27c), 167 REG16(0x278), 168 REG16(0x274), 169 REG16(0x270), 170 171 END 172 }; 173 174 static const u8 dg2_xcs_offsets[] = { 175 NOP(1), 176 LRI(15, POSTED), 177 REG16(0x244), 178 REG(0x034), 179 REG(0x030), 180 REG(0x038), 181 REG(0x03c), 182 REG(0x168), 183 REG(0x140), 184 REG(0x110), 185 REG(0x1c0), 186 REG(0x1c4), 187 REG(0x1c8), 188 REG(0x180), 189 REG16(0x2b4), 190 REG(0x120), 191 REG(0x124), 192 193 NOP(1), 194 LRI(9, POSTED), 195 REG16(0x3a8), 196 REG16(0x28c), 197 REG16(0x288), 198 REG16(0x284), 199 REG16(0x280), 200 REG16(0x27c), 201 REG16(0x278), 202 REG16(0x274), 203 REG16(0x270), 204 205 END 206 }; 207 208 static const u8 gen12_rcs_offsets[] = { 209 NOP(1), 210 LRI(13, POSTED), 211 REG16(0x244), 212 REG(0x034), 213 REG(0x030), 214 REG(0x038), 215 REG(0x03c), 216 REG(0x168), 217 REG(0x140), 218 REG(0x110), 219 REG(0x1c0), 220 REG(0x1c4), 221 REG(0x1c8), 222 REG(0x180), 223 REG16(0x2b4), 224 225 NOP(5), 226 LRI(9, POSTED), 227 REG16(0x3a8), 228 REG16(0x28c), 229 REG16(0x288), 230 REG16(0x284), 231 REG16(0x280), 232 REG16(0x27c), 233 REG16(0x278), 234 REG16(0x274), 235 REG16(0x270), 236 237 LRI(3, POSTED), 238 REG(0x1b0), 239 REG16(0x5a8), 240 REG16(0x5ac), 241 242 NOP(6), 243 LRI(1, 0), 244 REG(0x0c8), 245 NOP(3 + 9 + 1), 246 247 LRI(51, POSTED), 248 REG16(0x588), 249 REG16(0x588), 250 REG16(0x588), 251 REG16(0x588), 252 REG16(0x588), 253 REG16(0x588), 254 REG(0x028), 255 REG(0x09c), 256 REG(0x0c0), 257 REG(0x178), 258 REG(0x17c), 259 REG16(0x358), 260 REG(0x170), 261 REG(0x150), 262 REG(0x154), 263 REG(0x158), 264 REG16(0x41c), 265 REG16(0x600), 266 REG16(0x604), 267 REG16(0x608), 268 REG16(0x60c), 269 REG16(0x610), 270 REG16(0x614), 271 REG16(0x618), 272 REG16(0x61c), 273 REG16(0x620), 274 REG16(0x624), 275 REG16(0x628), 276 REG16(0x62c), 277 REG16(0x630), 278 REG16(0x634), 279 REG16(0x638), 280 REG16(0x63c), 281 REG16(0x640), 282 REG16(0x644), 283 REG16(0x648), 284 REG16(0x64c), 285 REG16(0x650), 286 REG16(0x654), 287 REG16(0x658), 288 REG16(0x65c), 289 REG16(0x660), 290 REG16(0x664), 291 REG16(0x668), 292 REG16(0x66c), 293 REG16(0x670), 294 REG16(0x674), 295 REG16(0x678), 296 REG16(0x67c), 297 REG(0x068), 298 REG(0x084), 299 NOP(1), 300 301 END 302 }; 303 304 static const u8 xehp_rcs_offsets[] = { 305 NOP(1), 306 LRI(13, POSTED), 307 REG16(0x244), 308 REG(0x034), 309 REG(0x030), 310 REG(0x038), 311 REG(0x03c), 312 REG(0x168), 313 REG(0x140), 314 REG(0x110), 315 REG(0x1c0), 316 REG(0x1c4), 317 REG(0x1c8), 318 REG(0x180), 319 REG16(0x2b4), 320 321 NOP(5), 322 LRI(9, POSTED), 323 REG16(0x3a8), 324 REG16(0x28c), 325 REG16(0x288), 326 REG16(0x284), 327 REG16(0x280), 328 REG16(0x27c), 329 REG16(0x278), 330 REG16(0x274), 331 REG16(0x270), 332 333 LRI(3, POSTED), 334 REG(0x1b0), 335 REG16(0x5a8), 336 REG16(0x5ac), 337 338 NOP(6), 339 LRI(1, 0), 340 REG(0x0c8), 341 342 END 343 }; 344 345 static const u8 dg2_rcs_offsets[] = { 346 NOP(1), 347 LRI(15, POSTED), 348 REG16(0x244), 349 REG(0x034), 350 REG(0x030), 351 REG(0x038), 352 REG(0x03c), 353 REG(0x168), 354 REG(0x140), 355 REG(0x110), 356 REG(0x1c0), 357 REG(0x1c4), 358 REG(0x1c8), 359 REG(0x180), 360 REG16(0x2b4), 361 REG(0x120), 362 REG(0x124), 363 364 NOP(1), 365 LRI(9, POSTED), 366 REG16(0x3a8), 367 REG16(0x28c), 368 REG16(0x288), 369 REG16(0x284), 370 REG16(0x280), 371 REG16(0x27c), 372 REG16(0x278), 373 REG16(0x274), 374 REG16(0x270), 375 376 LRI(3, POSTED), 377 REG(0x1b0), 378 REG16(0x5a8), 379 REG16(0x5ac), 380 381 NOP(6), 382 LRI(1, 0), 383 REG(0x0c8), 384 385 END 386 }; 387 388 static const u8 mtl_rcs_offsets[] = { 389 NOP(1), 390 LRI(15, POSTED), 391 REG16(0x244), 392 REG(0x034), 393 REG(0x030), 394 REG(0x038), 395 REG(0x03c), 396 REG(0x168), 397 REG(0x140), 398 REG(0x110), 399 REG(0x1c0), 400 REG(0x1c4), 401 REG(0x1c8), 402 REG(0x180), 403 REG16(0x2b4), 404 REG(0x120), 405 REG(0x124), 406 407 NOP(1), 408 LRI(9, POSTED), 409 REG16(0x3a8), 410 REG16(0x28c), 411 REG16(0x288), 412 REG16(0x284), 413 REG16(0x280), 414 REG16(0x27c), 415 REG16(0x278), 416 REG16(0x274), 417 REG16(0x270), 418 419 NOP(2), 420 LRI(2, POSTED), 421 REG16(0x5a8), 422 REG16(0x5ac), 423 424 NOP(6), 425 LRI(1, 0), 426 REG(0x0c8), 427 428 END 429 }; 430 431 #define XE2_CTX_COMMON \ 432 NOP(1), /* [0x00] */ \ 433 LRI(15, POSTED), /* [0x01] */ \ 434 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 435 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 436 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 437 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 438 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 439 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 440 REG(0x140), /* [0x0e] BB_ADDR */ \ 441 REG(0x110), /* [0x10] BB_STATE */ \ 442 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 443 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 444 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 445 REG(0x180), /* [0x18] CCID */ \ 446 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 447 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 448 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 449 \ 450 NOP(1), /* [0x20] */ \ 451 LRI(9, POSTED), /* [0x21] */ \ 452 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 453 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 454 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 455 REG16(0x284), /* [0x28] dummy reg */ \ 456 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 457 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 458 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 459 REG16(0x274), /* [0x30] PTBP_UDW */ \ 460 REG16(0x270) /* [0x32] PTBP_LDW */ 461 462 static const u8 xe2_rcs_offsets[] = { 463 XE2_CTX_COMMON, 464 465 NOP(2), /* [0x34] */ 466 LRI(2, POSTED), /* [0x36] */ 467 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 468 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 469 470 NOP(6), /* [0x41] */ 471 LRI(1, 0), /* [0x47] */ 472 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 473 474 END 475 }; 476 477 static const u8 xe2_bcs_offsets[] = { 478 XE2_CTX_COMMON, 479 480 NOP(4 + 8 + 1), /* [0x34] */ 481 LRI(2, POSTED), /* [0x41] */ 482 REG16(0x200), /* [0x42] BCS_SWCTRL */ 483 REG16(0x204), /* [0x44] BLIT_CCTL */ 484 485 END 486 }; 487 488 static const u8 xe2_xcs_offsets[] = { 489 XE2_CTX_COMMON, 490 491 END 492 }; 493 494 #undef END 495 #undef REG16 496 #undef REG 497 #undef LRI 498 #undef NOP 499 500 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 501 { 502 if (class == XE_ENGINE_CLASS_RENDER) { 503 if (GRAPHICS_VER(xe) >= 20) 504 return xe2_rcs_offsets; 505 else if (GRAPHICS_VERx100(xe) >= 1270) 506 return mtl_rcs_offsets; 507 else if (GRAPHICS_VERx100(xe) >= 1255) 508 return dg2_rcs_offsets; 509 else if (GRAPHICS_VERx100(xe) >= 1250) 510 return xehp_rcs_offsets; 511 else 512 return gen12_rcs_offsets; 513 } else if (class == XE_ENGINE_CLASS_COPY) { 514 if (GRAPHICS_VER(xe) >= 20) 515 return xe2_bcs_offsets; 516 else 517 return gen12_xcs_offsets; 518 } else { 519 if (GRAPHICS_VER(xe) >= 20) 520 return xe2_xcs_offsets; 521 else if (GRAPHICS_VERx100(xe) >= 1255) 522 return dg2_xcs_offsets; 523 else 524 return gen12_xcs_offsets; 525 } 526 } 527 528 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 529 { 530 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | 531 _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | 532 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; 533 534 /* TODO: Timestamp */ 535 } 536 537 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) 538 { 539 struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq; 540 struct xe_device *xe = gt_to_xe(hwe->gt); 541 542 if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe)) 543 return; 544 545 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | 546 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; 547 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 548 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 549 550 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | 551 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 552 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 553 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq); 554 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 555 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq); 556 } 557 558 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 559 { 560 struct xe_device *xe = gt_to_xe(hwe->gt); 561 562 if (GRAPHICS_VERx100(xe) >= 1250) 563 return 0x70; 564 else 565 return 0x60; 566 } 567 568 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 569 { 570 int x; 571 572 x = lrc_ring_mi_mode(hwe); 573 regs[x + 1] &= ~STOP_RING; 574 regs[x + 1] |= STOP_RING << 16; 575 } 576 577 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 578 { 579 return 0; 580 } 581 582 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 583 { 584 return lrc->ring.size; 585 } 586 587 /* Make the magic macros work */ 588 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 589 590 #define LRC_SEQNO_PPHWSP_OFFSET 512 591 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 592 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 593 #define LRC_PPHWSP_SIZE SZ_4K 594 595 static size_t lrc_reg_size(struct xe_device *xe) 596 { 597 if (GRAPHICS_VERx100(xe) >= 1250) 598 return 96 * sizeof(u32); 599 else 600 return 80 * sizeof(u32); 601 } 602 603 size_t xe_lrc_skip_size(struct xe_device *xe) 604 { 605 return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 606 } 607 608 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 609 { 610 /* The seqno is stored in the driver-defined portion of PPHWSP */ 611 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 612 } 613 614 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 615 { 616 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 617 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 618 } 619 620 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 621 { 622 /* The parallel is stored in the driver-defined portion of PPHWSP */ 623 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 624 } 625 626 static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) 627 { 628 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 629 } 630 631 #define DECL_MAP_ADDR_HELPERS(elem) \ 632 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 633 { \ 634 struct iosys_map map = lrc->bo->vmap; \ 635 \ 636 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 637 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 638 return map; \ 639 } \ 640 static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 641 { \ 642 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 643 } \ 644 645 DECL_MAP_ADDR_HELPERS(ring) 646 DECL_MAP_ADDR_HELPERS(pphwsp) 647 DECL_MAP_ADDR_HELPERS(seqno) 648 DECL_MAP_ADDR_HELPERS(regs) 649 DECL_MAP_ADDR_HELPERS(start_seqno) 650 DECL_MAP_ADDR_HELPERS(parallel) 651 652 #undef DECL_MAP_ADDR_HELPERS 653 654 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 655 { 656 return __xe_lrc_pphwsp_ggtt_addr(lrc); 657 } 658 659 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 660 { 661 struct xe_device *xe = lrc_to_xe(lrc); 662 struct iosys_map map; 663 664 map = __xe_lrc_regs_map(lrc); 665 iosys_map_incr(&map, reg_nr * sizeof(u32)); 666 return xe_map_read32(xe, &map); 667 } 668 669 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 670 { 671 struct xe_device *xe = lrc_to_xe(lrc); 672 struct iosys_map map; 673 674 map = __xe_lrc_regs_map(lrc); 675 iosys_map_incr(&map, reg_nr * sizeof(u32)); 676 xe_map_write32(xe, &map, val); 677 } 678 679 static void *empty_lrc_data(struct xe_hw_engine *hwe) 680 { 681 struct xe_device *xe = gt_to_xe(hwe->gt); 682 void *data; 683 u32 *regs; 684 685 data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); 686 if (!data) 687 return NULL; 688 689 /* 1st page: Per-Process of HW status Page */ 690 regs = data + LRC_PPHWSP_SIZE; 691 set_offsets(regs, reg_offsets(xe, hwe->class), hwe); 692 set_context_control(regs, hwe); 693 set_memory_based_intr(regs, hwe); 694 reset_stop_ring(regs, hwe); 695 696 return data; 697 } 698 699 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 700 { 701 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); 702 703 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 704 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 705 } 706 707 #define PVC_CTX_ASID (0x2e + 1) 708 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 709 710 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 711 struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) 712 { 713 struct xe_gt *gt = hwe->gt; 714 struct xe_tile *tile = gt_to_tile(gt); 715 struct xe_device *xe = gt_to_xe(gt); 716 struct iosys_map map; 717 void *init_data = NULL; 718 u32 arb_enable; 719 int err; 720 721 lrc->flags = 0; 722 723 /* 724 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 725 * via VM bind calls. 726 */ 727 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, 728 ring_size + xe_lrc_size(xe, hwe->class), 729 ttm_bo_type_kernel, 730 XE_BO_CREATE_VRAM_IF_DGFX(tile) | 731 XE_BO_CREATE_GGTT_BIT); 732 if (IS_ERR(lrc->bo)) 733 return PTR_ERR(lrc->bo); 734 735 lrc->tile = gt_to_tile(hwe->gt); 736 lrc->ring.size = ring_size; 737 lrc->ring.tail = 0; 738 739 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 740 hwe->fence_irq, hwe->name); 741 742 if (!gt->default_lrc[hwe->class]) { 743 init_data = empty_lrc_data(hwe); 744 if (!init_data) { 745 err = -ENOMEM; 746 goto err_lrc_finish; 747 } 748 } 749 750 /* 751 * Init Per-Process of HW status Page, LRC / context state to known 752 * values 753 */ 754 map = __xe_lrc_pphwsp_map(lrc); 755 if (!init_data) { 756 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 757 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 758 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 759 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); 760 } else { 761 xe_map_memcpy_to(xe, &map, 0, init_data, 762 xe_lrc_size(xe, hwe->class)); 763 kfree(init_data); 764 } 765 766 if (vm) { 767 xe_lrc_set_ppgtt(lrc, vm); 768 769 if (vm->xef) 770 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 771 } 772 773 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 774 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 775 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 776 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 777 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 778 if (xe->info.has_asid && vm) 779 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); 780 781 lrc->desc = LRC_VALID; 782 lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; 783 /* TODO: Priority */ 784 785 /* While this appears to have something about privileged batches or 786 * some such, it really just means PPGTT mode. 787 */ 788 if (vm) 789 lrc->desc |= LRC_PRIVILEGE; 790 791 if (GRAPHICS_VERx100(xe) < 1250) { 792 lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; 793 lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT; 794 } 795 796 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 797 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 798 799 map = __xe_lrc_seqno_map(lrc); 800 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 801 802 map = __xe_lrc_start_seqno_map(lrc); 803 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 804 805 return 0; 806 807 err_lrc_finish: 808 xe_lrc_finish(lrc); 809 return err; 810 } 811 812 void xe_lrc_finish(struct xe_lrc *lrc) 813 { 814 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 815 xe_bo_lock(lrc->bo, false); 816 xe_bo_unpin(lrc->bo); 817 xe_bo_unlock(lrc->bo); 818 xe_bo_put(lrc->bo); 819 } 820 821 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 822 { 823 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 824 } 825 826 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 827 { 828 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 829 } 830 831 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 832 { 833 const u32 head = xe_lrc_ring_head(lrc); 834 const u32 tail = lrc->ring.tail; 835 const u32 size = lrc->ring.size; 836 837 return ((head - tail - 1) & (size - 1)) + 1; 838 } 839 840 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 841 const void *data, size_t size) 842 { 843 struct xe_device *xe = lrc_to_xe(lrc); 844 845 iosys_map_incr(&ring, lrc->ring.tail); 846 xe_map_memcpy_to(xe, &ring, 0, data, size); 847 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 848 } 849 850 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 851 { 852 struct xe_device *xe = lrc_to_xe(lrc); 853 struct iosys_map ring; 854 u32 rhs; 855 size_t aligned_size; 856 857 xe_assert(xe, IS_ALIGNED(size, 4)); 858 aligned_size = ALIGN(size, 8); 859 860 ring = __xe_lrc_ring_map(lrc); 861 862 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 863 rhs = lrc->ring.size - lrc->ring.tail; 864 if (size > rhs) { 865 __xe_lrc_write_ring(lrc, ring, data, rhs); 866 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 867 } else { 868 __xe_lrc_write_ring(lrc, ring, data, size); 869 } 870 871 if (aligned_size > size) { 872 u32 noop = MI_NOOP; 873 874 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 875 } 876 } 877 878 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 879 { 880 return lrc->desc | xe_lrc_ggtt_addr(lrc); 881 } 882 883 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 884 { 885 return __xe_lrc_seqno_ggtt_addr(lrc); 886 } 887 888 struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc) 889 { 890 return &xe_hw_fence_create(&lrc->fence_ctx, 891 __xe_lrc_seqno_map(lrc))->dma; 892 } 893 894 s32 xe_lrc_seqno(struct xe_lrc *lrc) 895 { 896 struct iosys_map map = __xe_lrc_seqno_map(lrc); 897 898 return xe_map_read32(lrc_to_xe(lrc), &map); 899 } 900 901 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 902 { 903 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 904 905 return xe_map_read32(lrc_to_xe(lrc), &map); 906 } 907 908 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 909 { 910 return __xe_lrc_start_seqno_ggtt_addr(lrc); 911 } 912 913 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 914 { 915 return __xe_lrc_parallel_ggtt_addr(lrc); 916 } 917 918 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 919 { 920 return __xe_lrc_parallel_map(lrc); 921 } 922 923 static int instr_dw(u32 cmd_header) 924 { 925 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 926 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 927 GFXPIPE_SINGLE_DW_CMD(0, 0)) 928 return 1; 929 930 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 931 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 932 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 933 934 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 935 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 936 } 937 938 static int dump_mi_command(struct drm_printer *p, 939 struct xe_gt *gt, 940 u32 *dw, 941 int remaining_dw) 942 { 943 u32 inst_header = *dw; 944 u32 numdw = instr_dw(inst_header); 945 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 946 int num_noop; 947 948 /* First check for commands that don't have/use a '# DW' field */ 949 switch (inst_header & MI_OPCODE) { 950 case MI_NOOP: 951 num_noop = 1; 952 while (num_noop < remaining_dw && 953 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 954 num_noop++; 955 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 956 return num_noop; 957 958 case MI_TOPOLOGY_FILTER: 959 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 960 return 1; 961 962 case MI_BATCH_BUFFER_END: 963 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 964 /* Return 'remaining_dw' to consume the rest of the LRC */ 965 return remaining_dw; 966 } 967 968 /* 969 * Any remaining commands include a # of dwords. We should make sure 970 * it doesn't exceed the remaining size of the LRC. 971 */ 972 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 973 numdw = remaining_dw; 974 975 switch (inst_header & MI_OPCODE) { 976 case MI_LOAD_REGISTER_IMM: 977 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 978 inst_header, (numdw - 1) / 2); 979 for (int i = 1; i < numdw; i += 2) 980 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 981 return numdw; 982 983 case MI_LOAD_REGISTER_MEM & MI_OPCODE: 984 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", 985 inst_header, 986 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", 987 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); 988 if (numdw == 4) 989 drm_printf(p, " - %#6x = %#010llx\n", 990 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); 991 else 992 drm_printf(p, " - %*ph (%s)\n", 993 (int)sizeof(u32) * (numdw - 1), dw + 1, 994 numdw < 4 ? "truncated" : "malformed"); 995 return numdw; 996 997 case MI_FORCE_WAKEUP: 998 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 999 return numdw; 1000 1001 default: 1002 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 1003 inst_header, opcode, numdw); 1004 return numdw; 1005 } 1006 } 1007 1008 static int dump_gfxpipe_command(struct drm_printer *p, 1009 struct xe_gt *gt, 1010 u32 *dw, 1011 int remaining_dw) 1012 { 1013 u32 numdw = instr_dw(*dw); 1014 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 1015 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 1016 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 1017 1018 /* 1019 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1020 * remaining size of the LRC. 1021 */ 1022 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1023 numdw = remaining_dw; 1024 1025 switch (*dw & GFXPIPE_MATCH_MASK) { 1026 #define MATCH(cmd) \ 1027 case cmd: \ 1028 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1029 return numdw 1030 #define MATCH3D(cmd) \ 1031 case CMD_##cmd: \ 1032 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1033 return numdw 1034 1035 MATCH(STATE_BASE_ADDRESS); 1036 MATCH(STATE_SIP); 1037 MATCH(GPGPU_CSR_BASE_ADDRESS); 1038 MATCH(STATE_COMPUTE_MODE); 1039 MATCH3D(3DSTATE_BTD); 1040 1041 MATCH3D(3DSTATE_VF_STATISTICS); 1042 1043 MATCH(PIPELINE_SELECT); 1044 1045 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1046 MATCH3D(3DSTATE_CLEAR_PARAMS); 1047 MATCH3D(3DSTATE_DEPTH_BUFFER); 1048 MATCH3D(3DSTATE_STENCIL_BUFFER); 1049 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1050 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1051 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1052 MATCH3D(3DSTATE_INDEX_BUFFER); 1053 MATCH3D(3DSTATE_VF); 1054 MATCH3D(3DSTATE_MULTISAMPLE); 1055 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1056 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1057 MATCH3D(3DSTATE_VS); 1058 MATCH3D(3DSTATE_GS); 1059 MATCH3D(3DSTATE_CLIP); 1060 MATCH3D(3DSTATE_SF); 1061 MATCH3D(3DSTATE_WM); 1062 MATCH3D(3DSTATE_CONSTANT_VS); 1063 MATCH3D(3DSTATE_CONSTANT_GS); 1064 MATCH3D(3DSTATE_SAMPLE_MASK); 1065 MATCH3D(3DSTATE_CONSTANT_HS); 1066 MATCH3D(3DSTATE_CONSTANT_DS); 1067 MATCH3D(3DSTATE_HS); 1068 MATCH3D(3DSTATE_TE); 1069 MATCH3D(3DSTATE_DS); 1070 MATCH3D(3DSTATE_STREAMOUT); 1071 MATCH3D(3DSTATE_SBE); 1072 MATCH3D(3DSTATE_PS); 1073 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1074 MATCH3D(3DSTATE_CPS_POINTERS); 1075 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1076 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1077 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1078 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1079 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1080 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1081 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1082 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1083 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1084 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1085 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1086 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1087 MATCH3D(3DSTATE_VF_INSTANCING); 1088 MATCH3D(3DSTATE_VF_SGVS); 1089 MATCH3D(3DSTATE_VF_TOPOLOGY); 1090 MATCH3D(3DSTATE_WM_CHROMAKEY); 1091 MATCH3D(3DSTATE_PS_BLEND); 1092 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1093 MATCH3D(3DSTATE_PS_EXTRA); 1094 MATCH3D(3DSTATE_RASTER); 1095 MATCH3D(3DSTATE_SBE_SWIZ); 1096 MATCH3D(3DSTATE_WM_HZ_OP); 1097 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1098 MATCH3D(3DSTATE_VF_SGVS_2); 1099 MATCH3D(3DSTATE_VFG); 1100 MATCH3D(3DSTATE_URB_ALLOC_VS); 1101 MATCH3D(3DSTATE_URB_ALLOC_HS); 1102 MATCH3D(3DSTATE_URB_ALLOC_DS); 1103 MATCH3D(3DSTATE_URB_ALLOC_GS); 1104 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1105 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1106 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1107 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1108 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1109 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1110 MATCH3D(3DSTATE_AMFS); 1111 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1112 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1113 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1114 MATCH3D(3DSTATE_MESH_CONTROL); 1115 MATCH3D(3DSTATE_MESH_DISTRIB); 1116 MATCH3D(3DSTATE_TASK_REDISTRIB); 1117 MATCH3D(3DSTATE_MESH_SHADER); 1118 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1119 MATCH3D(3DSTATE_TASK_CONTROL); 1120 MATCH3D(3DSTATE_TASK_SHADER); 1121 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1122 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1123 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1124 MATCH3D(3DSTATE_CLIP_MESH); 1125 MATCH3D(3DSTATE_SBE_MESH); 1126 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1127 1128 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1129 MATCH3D(3DSTATE_CHROMA_KEY); 1130 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1131 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1132 MATCH3D(3DSTATE_LINE_STIPPLE); 1133 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1134 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1135 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1136 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1137 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1138 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1139 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1140 MATCH3D(3DSTATE_SO_DECL_LIST); 1141 MATCH3D(3DSTATE_SO_BUFFER); 1142 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1143 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1144 MATCH3D(3DSTATE_3D_MODE); 1145 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1146 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1147 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1148 1149 default: 1150 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1151 *dw, pipeline, opcode, subopcode, numdw); 1152 return numdw; 1153 } 1154 } 1155 1156 void xe_lrc_dump_default(struct drm_printer *p, 1157 struct xe_gt *gt, 1158 enum xe_engine_class hwe_class) 1159 { 1160 u32 *dw; 1161 int remaining_dw, num_dw; 1162 1163 if (!gt->default_lrc[hwe_class]) { 1164 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1165 return; 1166 } 1167 1168 /* 1169 * Skip the beginning of the LRC since it contains the per-process 1170 * hardware status page. 1171 */ 1172 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1173 remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4; 1174 1175 while (remaining_dw > 0) { 1176 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1177 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1178 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1179 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1180 } else { 1181 num_dw = min(instr_dw(*dw), remaining_dw); 1182 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 1183 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 1184 num_dw); 1185 } 1186 1187 dw += num_dw; 1188 remaining_dw -= num_dw; 1189 } 1190 } 1191 1192 struct instr_state { 1193 u32 instr; 1194 u16 num_dw; 1195 }; 1196 1197 static const struct instr_state xe_hpg_svg_state[] = { 1198 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 1199 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 1200 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 1201 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 1202 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 1203 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 1204 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 1205 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 1206 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 1207 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 1208 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 1209 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 1210 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 1211 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 1212 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 1213 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 1214 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 1215 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 1216 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 1217 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 1218 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 1219 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 1220 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 1221 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 1222 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 1223 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 1224 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 1225 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 1226 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 1227 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 1228 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 1229 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 1230 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 1231 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 1232 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 1233 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 1234 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 1235 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 1236 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 1237 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 1238 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 1239 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 1240 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 1241 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 1242 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 1243 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 1244 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 1245 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 1246 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 1247 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 1248 }; 1249 1250 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) 1251 { 1252 struct xe_gt *gt = q->hwe->gt; 1253 struct xe_device *xe = gt_to_xe(gt); 1254 const struct instr_state *state_table = NULL; 1255 int state_table_size = 0; 1256 1257 /* 1258 * At the moment we only need to emit non-register state for the RCS 1259 * engine. 1260 */ 1261 if (q->hwe->class != XE_ENGINE_CLASS_RENDER) 1262 return; 1263 1264 switch (GRAPHICS_VERx100(xe)) { 1265 case 1255: 1266 case 1270 ... 2004: 1267 state_table = xe_hpg_svg_state; 1268 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1269 break; 1270 default: 1271 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1272 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1273 return; 1274 } 1275 1276 for (int i = 0; i < state_table_size; i++) { 1277 u32 instr = state_table[i].instr; 1278 u16 num_dw = state_table[i].num_dw; 1279 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 1280 1281 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 1282 xe_gt_assert(gt, num_dw != 0); 1283 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 1284 1285 /* 1286 * Xe2's SVG context is the same as the one on DG2 / MTL 1287 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 1288 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 1289 * Just make the replacement here rather than defining a 1290 * whole separate table for the single trivial change. 1291 */ 1292 if (GRAPHICS_VER(xe) >= 20 && 1293 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 1294 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 1295 1296 bb->cs[bb->len] = instr; 1297 if (!is_single_dw) 1298 bb->cs[bb->len] |= (num_dw - 2); 1299 1300 bb->len += num_dw; 1301 } 1302 } 1303