1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include "instructions/xe_mi_commands.h" 9 #include "instructions/xe_gfxpipe_commands.h" 10 #include "regs/xe_engine_regs.h" 11 #include "regs/xe_gpu_commands.h" 12 #include "regs/xe_gt_regs.h" 13 #include "regs/xe_lrc_layout.h" 14 #include "regs/xe_regs.h" 15 #include "xe_bb.h" 16 #include "xe_bo.h" 17 #include "xe_device.h" 18 #include "xe_drm_client.h" 19 #include "xe_exec_queue_types.h" 20 #include "xe_gt.h" 21 #include "xe_gt_printk.h" 22 #include "xe_hw_fence.h" 23 #include "xe_map.h" 24 #include "xe_vm.h" 25 26 #define CTX_VALID (1 << 0) 27 #define CTX_PRIVILEGE (1 << 8) 28 #define CTX_ADDRESSING_MODE_SHIFT 3 29 #define LEGACY_64B_CONTEXT 3 30 31 #define ENGINE_CLASS_SHIFT 61 32 #define ENGINE_INSTANCE_SHIFT 48 33 34 static struct xe_device * 35 lrc_to_xe(struct xe_lrc *lrc) 36 { 37 return gt_to_xe(lrc->fence_ctx.gt); 38 } 39 40 size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) 41 { 42 switch (class) { 43 case XE_ENGINE_CLASS_RENDER: 44 if (GRAPHICS_VER(xe) >= 20) 45 return 4 * SZ_4K; 46 else 47 return 14 * SZ_4K; 48 case XE_ENGINE_CLASS_COMPUTE: 49 /* 14 pages since graphics_ver == 11 */ 50 if (GRAPHICS_VER(xe) >= 20) 51 return 3 * SZ_4K; 52 else 53 return 14 * SZ_4K; 54 default: 55 WARN(1, "Unknown engine class: %d", class); 56 fallthrough; 57 case XE_ENGINE_CLASS_COPY: 58 case XE_ENGINE_CLASS_VIDEO_DECODE: 59 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 60 case XE_ENGINE_CLASS_OTHER: 61 return 2 * SZ_4K; 62 } 63 } 64 65 /* 66 * The per-platform tables are u8-encoded in @data. Decode @data and set the 67 * addresses' offset and commands in @regs. The following encoding is used 68 * for each byte. There are 2 steps: decoding commands and decoding addresses. 69 * 70 * Commands: 71 * [7]: create NOPs - number of NOPs are set in lower bits 72 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 73 * MI_LRI_FORCE_POSTED 74 * [5:0]: Number of NOPs or registers to set values to in case of 75 * MI_LOAD_REGISTER_IMM 76 * 77 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 78 * number of registers. They are set by using the REG/REG16 macros: the former 79 * is used for offsets smaller than 0x200 while the latter is for values bigger 80 * than that. Those macros already set all the bits documented below correctly: 81 * 82 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 83 * follow, for the lower bits 84 * [6:0]: Register offset, without considering the engine base. 85 * 86 * This function only tweaks the commands and register offsets. Values are not 87 * filled out. 88 */ 89 static void set_offsets(u32 *regs, 90 const u8 *data, 91 const struct xe_hw_engine *hwe) 92 #define NOP(x) (BIT(7) | (x)) 93 #define LRI(count, flags) ((flags) << 6 | (count) | \ 94 BUILD_BUG_ON_ZERO(count >= BIT(6))) 95 #define POSTED BIT(0) 96 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 97 #define REG16(x) \ 98 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 99 (((x) >> 2) & 0x7f) 100 #define END 0 101 { 102 const u32 base = hwe->mmio_base; 103 104 while (*data) { 105 u8 count, flags; 106 107 if (*data & BIT(7)) { /* skip */ 108 count = *data++ & ~BIT(7); 109 regs += count; 110 continue; 111 } 112 113 count = *data & 0x3f; 114 flags = *data >> 6; 115 data++; 116 117 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 118 if (flags & POSTED) 119 *regs |= MI_LRI_FORCE_POSTED; 120 *regs |= MI_LRI_LRM_CS_MMIO; 121 regs++; 122 123 xe_gt_assert(hwe->gt, count); 124 do { 125 u32 offset = 0; 126 u8 v; 127 128 do { 129 v = *data++; 130 offset <<= 7; 131 offset |= v & ~BIT(7); 132 } while (v & BIT(7)); 133 134 regs[0] = base + (offset << 2); 135 regs += 2; 136 } while (--count); 137 } 138 139 *regs = MI_BATCH_BUFFER_END | BIT(0); 140 } 141 142 static const u8 gen12_xcs_offsets[] = { 143 NOP(1), 144 LRI(13, POSTED), 145 REG16(0x244), 146 REG(0x034), 147 REG(0x030), 148 REG(0x038), 149 REG(0x03c), 150 REG(0x168), 151 REG(0x140), 152 REG(0x110), 153 REG(0x1c0), 154 REG(0x1c4), 155 REG(0x1c8), 156 REG(0x180), 157 REG16(0x2b4), 158 159 NOP(5), 160 LRI(9, POSTED), 161 REG16(0x3a8), 162 REG16(0x28c), 163 REG16(0x288), 164 REG16(0x284), 165 REG16(0x280), 166 REG16(0x27c), 167 REG16(0x278), 168 REG16(0x274), 169 REG16(0x270), 170 171 END 172 }; 173 174 static const u8 dg2_xcs_offsets[] = { 175 NOP(1), 176 LRI(15, POSTED), 177 REG16(0x244), 178 REG(0x034), 179 REG(0x030), 180 REG(0x038), 181 REG(0x03c), 182 REG(0x168), 183 REG(0x140), 184 REG(0x110), 185 REG(0x1c0), 186 REG(0x1c4), 187 REG(0x1c8), 188 REG(0x180), 189 REG16(0x2b4), 190 REG(0x120), 191 REG(0x124), 192 193 NOP(1), 194 LRI(9, POSTED), 195 REG16(0x3a8), 196 REG16(0x28c), 197 REG16(0x288), 198 REG16(0x284), 199 REG16(0x280), 200 REG16(0x27c), 201 REG16(0x278), 202 REG16(0x274), 203 REG16(0x270), 204 205 END 206 }; 207 208 static const u8 gen12_rcs_offsets[] = { 209 NOP(1), 210 LRI(13, POSTED), 211 REG16(0x244), 212 REG(0x034), 213 REG(0x030), 214 REG(0x038), 215 REG(0x03c), 216 REG(0x168), 217 REG(0x140), 218 REG(0x110), 219 REG(0x1c0), 220 REG(0x1c4), 221 REG(0x1c8), 222 REG(0x180), 223 REG16(0x2b4), 224 225 NOP(5), 226 LRI(9, POSTED), 227 REG16(0x3a8), 228 REG16(0x28c), 229 REG16(0x288), 230 REG16(0x284), 231 REG16(0x280), 232 REG16(0x27c), 233 REG16(0x278), 234 REG16(0x274), 235 REG16(0x270), 236 237 LRI(3, POSTED), 238 REG(0x1b0), 239 REG16(0x5a8), 240 REG16(0x5ac), 241 242 NOP(6), 243 LRI(1, 0), 244 REG(0x0c8), 245 NOP(3 + 9 + 1), 246 247 LRI(51, POSTED), 248 REG16(0x588), 249 REG16(0x588), 250 REG16(0x588), 251 REG16(0x588), 252 REG16(0x588), 253 REG16(0x588), 254 REG(0x028), 255 REG(0x09c), 256 REG(0x0c0), 257 REG(0x178), 258 REG(0x17c), 259 REG16(0x358), 260 REG(0x170), 261 REG(0x150), 262 REG(0x154), 263 REG(0x158), 264 REG16(0x41c), 265 REG16(0x600), 266 REG16(0x604), 267 REG16(0x608), 268 REG16(0x60c), 269 REG16(0x610), 270 REG16(0x614), 271 REG16(0x618), 272 REG16(0x61c), 273 REG16(0x620), 274 REG16(0x624), 275 REG16(0x628), 276 REG16(0x62c), 277 REG16(0x630), 278 REG16(0x634), 279 REG16(0x638), 280 REG16(0x63c), 281 REG16(0x640), 282 REG16(0x644), 283 REG16(0x648), 284 REG16(0x64c), 285 REG16(0x650), 286 REG16(0x654), 287 REG16(0x658), 288 REG16(0x65c), 289 REG16(0x660), 290 REG16(0x664), 291 REG16(0x668), 292 REG16(0x66c), 293 REG16(0x670), 294 REG16(0x674), 295 REG16(0x678), 296 REG16(0x67c), 297 REG(0x068), 298 REG(0x084), 299 NOP(1), 300 301 END 302 }; 303 304 static const u8 xehp_rcs_offsets[] = { 305 NOP(1), 306 LRI(13, POSTED), 307 REG16(0x244), 308 REG(0x034), 309 REG(0x030), 310 REG(0x038), 311 REG(0x03c), 312 REG(0x168), 313 REG(0x140), 314 REG(0x110), 315 REG(0x1c0), 316 REG(0x1c4), 317 REG(0x1c8), 318 REG(0x180), 319 REG16(0x2b4), 320 321 NOP(5), 322 LRI(9, POSTED), 323 REG16(0x3a8), 324 REG16(0x28c), 325 REG16(0x288), 326 REG16(0x284), 327 REG16(0x280), 328 REG16(0x27c), 329 REG16(0x278), 330 REG16(0x274), 331 REG16(0x270), 332 333 LRI(3, POSTED), 334 REG(0x1b0), 335 REG16(0x5a8), 336 REG16(0x5ac), 337 338 NOP(6), 339 LRI(1, 0), 340 REG(0x0c8), 341 342 END 343 }; 344 345 static const u8 dg2_rcs_offsets[] = { 346 NOP(1), 347 LRI(15, POSTED), 348 REG16(0x244), 349 REG(0x034), 350 REG(0x030), 351 REG(0x038), 352 REG(0x03c), 353 REG(0x168), 354 REG(0x140), 355 REG(0x110), 356 REG(0x1c0), 357 REG(0x1c4), 358 REG(0x1c8), 359 REG(0x180), 360 REG16(0x2b4), 361 REG(0x120), 362 REG(0x124), 363 364 NOP(1), 365 LRI(9, POSTED), 366 REG16(0x3a8), 367 REG16(0x28c), 368 REG16(0x288), 369 REG16(0x284), 370 REG16(0x280), 371 REG16(0x27c), 372 REG16(0x278), 373 REG16(0x274), 374 REG16(0x270), 375 376 LRI(3, POSTED), 377 REG(0x1b0), 378 REG16(0x5a8), 379 REG16(0x5ac), 380 381 NOP(6), 382 LRI(1, 0), 383 REG(0x0c8), 384 385 END 386 }; 387 388 static const u8 mtl_rcs_offsets[] = { 389 NOP(1), 390 LRI(15, POSTED), 391 REG16(0x244), 392 REG(0x034), 393 REG(0x030), 394 REG(0x038), 395 REG(0x03c), 396 REG(0x168), 397 REG(0x140), 398 REG(0x110), 399 REG(0x1c0), 400 REG(0x1c4), 401 REG(0x1c8), 402 REG(0x180), 403 REG16(0x2b4), 404 REG(0x120), 405 REG(0x124), 406 407 NOP(1), 408 LRI(9, POSTED), 409 REG16(0x3a8), 410 REG16(0x28c), 411 REG16(0x288), 412 REG16(0x284), 413 REG16(0x280), 414 REG16(0x27c), 415 REG16(0x278), 416 REG16(0x274), 417 REG16(0x270), 418 419 NOP(2), 420 LRI(2, POSTED), 421 REG16(0x5a8), 422 REG16(0x5ac), 423 424 NOP(6), 425 LRI(1, 0), 426 REG(0x0c8), 427 428 END 429 }; 430 431 #define XE2_CTX_COMMON \ 432 NOP(1), /* [0x00] */ \ 433 LRI(15, POSTED), /* [0x01] */ \ 434 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 435 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 436 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 437 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 438 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 439 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 440 REG(0x140), /* [0x0e] BB_ADDR */ \ 441 REG(0x110), /* [0x10] BB_STATE */ \ 442 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 443 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 444 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 445 REG(0x180), /* [0x18] CCID */ \ 446 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 447 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 448 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 449 \ 450 NOP(1), /* [0x20] */ \ 451 LRI(9, POSTED), /* [0x21] */ \ 452 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 453 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 454 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 455 REG16(0x284), /* [0x28] dummy reg */ \ 456 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 457 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 458 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 459 REG16(0x274), /* [0x30] PTBP_UDW */ \ 460 REG16(0x270) /* [0x32] PTBP_LDW */ 461 462 static const u8 xe2_rcs_offsets[] = { 463 XE2_CTX_COMMON, 464 465 NOP(2), /* [0x34] */ 466 LRI(2, POSTED), /* [0x36] */ 467 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 468 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 469 470 NOP(6), /* [0x41] */ 471 LRI(1, 0), /* [0x47] */ 472 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 473 474 END 475 }; 476 477 static const u8 xe2_bcs_offsets[] = { 478 XE2_CTX_COMMON, 479 480 NOP(4 + 8 + 1), /* [0x34] */ 481 LRI(2, POSTED), /* [0x41] */ 482 REG16(0x200), /* [0x42] BCS_SWCTRL */ 483 REG16(0x204), /* [0x44] BLIT_CCTL */ 484 485 END 486 }; 487 488 static const u8 xe2_xcs_offsets[] = { 489 XE2_CTX_COMMON, 490 491 END 492 }; 493 494 #undef END 495 #undef REG16 496 #undef REG 497 #undef LRI 498 #undef NOP 499 500 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 501 { 502 if (class == XE_ENGINE_CLASS_RENDER) { 503 if (GRAPHICS_VER(xe) >= 20) 504 return xe2_rcs_offsets; 505 else if (GRAPHICS_VERx100(xe) >= 1270) 506 return mtl_rcs_offsets; 507 else if (GRAPHICS_VERx100(xe) >= 1255) 508 return dg2_rcs_offsets; 509 else if (GRAPHICS_VERx100(xe) >= 1250) 510 return xehp_rcs_offsets; 511 else 512 return gen12_rcs_offsets; 513 } else if (class == XE_ENGINE_CLASS_COPY) { 514 if (GRAPHICS_VER(xe) >= 20) 515 return xe2_bcs_offsets; 516 else 517 return gen12_xcs_offsets; 518 } else { 519 if (GRAPHICS_VER(xe) >= 20) 520 return xe2_xcs_offsets; 521 else if (GRAPHICS_VERx100(xe) >= 1255) 522 return dg2_xcs_offsets; 523 else 524 return gen12_xcs_offsets; 525 } 526 } 527 528 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 529 { 530 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | 531 _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | 532 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; 533 534 /* TODO: Timestamp */ 535 } 536 537 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 538 { 539 struct xe_device *xe = gt_to_xe(hwe->gt); 540 541 if (GRAPHICS_VERx100(xe) >= 1250) 542 return 0x70; 543 else 544 return 0x60; 545 } 546 547 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 548 { 549 int x; 550 551 x = lrc_ring_mi_mode(hwe); 552 regs[x + 1] &= ~STOP_RING; 553 regs[x + 1] |= STOP_RING << 16; 554 } 555 556 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 557 { 558 return 0; 559 } 560 561 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 562 { 563 return lrc->ring.size; 564 } 565 566 /* Make the magic macros work */ 567 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 568 569 #define LRC_SEQNO_PPHWSP_OFFSET 512 570 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 571 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 572 #define LRC_PPHWSP_SIZE SZ_4K 573 574 static size_t lrc_reg_size(struct xe_device *xe) 575 { 576 if (GRAPHICS_VERx100(xe) >= 1250) 577 return 96 * sizeof(u32); 578 else 579 return 80 * sizeof(u32); 580 } 581 582 size_t xe_lrc_skip_size(struct xe_device *xe) 583 { 584 return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 585 } 586 587 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 588 { 589 /* The seqno is stored in the driver-defined portion of PPHWSP */ 590 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 591 } 592 593 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 594 { 595 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 596 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 597 } 598 599 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 600 { 601 /* The parallel is stored in the driver-defined portion of PPHWSP */ 602 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 603 } 604 605 static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) 606 { 607 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 608 } 609 610 #define DECL_MAP_ADDR_HELPERS(elem) \ 611 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 612 { \ 613 struct iosys_map map = lrc->bo->vmap; \ 614 \ 615 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 616 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 617 return map; \ 618 } \ 619 static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 620 { \ 621 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 622 } \ 623 624 DECL_MAP_ADDR_HELPERS(ring) 625 DECL_MAP_ADDR_HELPERS(pphwsp) 626 DECL_MAP_ADDR_HELPERS(seqno) 627 DECL_MAP_ADDR_HELPERS(regs) 628 DECL_MAP_ADDR_HELPERS(start_seqno) 629 DECL_MAP_ADDR_HELPERS(parallel) 630 631 #undef DECL_MAP_ADDR_HELPERS 632 633 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 634 { 635 return __xe_lrc_pphwsp_ggtt_addr(lrc); 636 } 637 638 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 639 { 640 struct xe_device *xe = lrc_to_xe(lrc); 641 struct iosys_map map; 642 643 map = __xe_lrc_regs_map(lrc); 644 iosys_map_incr(&map, reg_nr * sizeof(u32)); 645 return xe_map_read32(xe, &map); 646 } 647 648 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 649 { 650 struct xe_device *xe = lrc_to_xe(lrc); 651 struct iosys_map map; 652 653 map = __xe_lrc_regs_map(lrc); 654 iosys_map_incr(&map, reg_nr * sizeof(u32)); 655 xe_map_write32(xe, &map, val); 656 } 657 658 static void *empty_lrc_data(struct xe_hw_engine *hwe) 659 { 660 struct xe_device *xe = gt_to_xe(hwe->gt); 661 void *data; 662 u32 *regs; 663 664 data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); 665 if (!data) 666 return NULL; 667 668 /* 1st page: Per-Process of HW status Page */ 669 regs = data + LRC_PPHWSP_SIZE; 670 set_offsets(regs, reg_offsets(xe, hwe->class), hwe); 671 set_context_control(regs, hwe); 672 reset_stop_ring(regs, hwe); 673 674 return data; 675 } 676 677 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 678 { 679 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); 680 681 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 682 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 683 } 684 685 #define PVC_CTX_ASID (0x2e + 1) 686 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 687 #define ACC_GRANULARITY_S 20 688 #define ACC_NOTIFY_S 16 689 690 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 691 struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) 692 { 693 struct xe_gt *gt = hwe->gt; 694 struct xe_tile *tile = gt_to_tile(gt); 695 struct xe_device *xe = gt_to_xe(gt); 696 struct iosys_map map; 697 void *init_data = NULL; 698 u32 arb_enable; 699 int err; 700 701 lrc->flags = 0; 702 703 /* 704 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 705 * via VM bind calls. 706 */ 707 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, 708 ring_size + xe_lrc_size(xe, hwe->class), 709 ttm_bo_type_kernel, 710 XE_BO_CREATE_VRAM_IF_DGFX(tile) | 711 XE_BO_CREATE_GGTT_BIT); 712 if (IS_ERR(lrc->bo)) 713 return PTR_ERR(lrc->bo); 714 715 lrc->tile = gt_to_tile(hwe->gt); 716 lrc->ring.size = ring_size; 717 lrc->ring.tail = 0; 718 719 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 720 hwe->fence_irq, hwe->name); 721 722 if (!gt->default_lrc[hwe->class]) { 723 init_data = empty_lrc_data(hwe); 724 if (!init_data) { 725 err = -ENOMEM; 726 goto err_lrc_finish; 727 } 728 } 729 730 /* 731 * Init Per-Process of HW status Page, LRC / context state to known 732 * values 733 */ 734 map = __xe_lrc_pphwsp_map(lrc); 735 if (!init_data) { 736 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 737 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 738 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 739 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); 740 } else { 741 xe_map_memcpy_to(xe, &map, 0, init_data, 742 xe_lrc_size(xe, hwe->class)); 743 kfree(init_data); 744 } 745 746 if (vm) { 747 xe_lrc_set_ppgtt(lrc, vm); 748 749 if (vm->xef) 750 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 751 } 752 753 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 754 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 755 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 756 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 757 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 758 if (xe->info.has_asid && vm) 759 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, 760 (q->usm.acc_granularity << 761 ACC_GRANULARITY_S) | vm->usm.asid); 762 if (xe->info.supports_usm && vm) 763 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, 764 (q->usm.acc_notify << ACC_NOTIFY_S) | 765 q->usm.acc_trigger); 766 767 lrc->desc = CTX_VALID; 768 lrc->desc |= LEGACY_64B_CONTEXT << CTX_ADDRESSING_MODE_SHIFT; 769 /* TODO: Priority */ 770 771 /* While this appears to have something about privileged batches or 772 * some such, it really just means PPGTT mode. 773 */ 774 if (vm) 775 lrc->desc |= CTX_PRIVILEGE; 776 777 if (GRAPHICS_VERx100(xe) < 1250) { 778 lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; 779 lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT; 780 } 781 782 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 783 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 784 785 map = __xe_lrc_seqno_map(lrc); 786 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 787 788 map = __xe_lrc_start_seqno_map(lrc); 789 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 790 791 return 0; 792 793 err_lrc_finish: 794 xe_lrc_finish(lrc); 795 return err; 796 } 797 798 void xe_lrc_finish(struct xe_lrc *lrc) 799 { 800 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 801 xe_bo_lock(lrc->bo, false); 802 xe_bo_unpin(lrc->bo); 803 xe_bo_unlock(lrc->bo); 804 xe_bo_put(lrc->bo); 805 } 806 807 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 808 { 809 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 810 } 811 812 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 813 { 814 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 815 } 816 817 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 818 { 819 const u32 head = xe_lrc_ring_head(lrc); 820 const u32 tail = lrc->ring.tail; 821 const u32 size = lrc->ring.size; 822 823 return ((head - tail - 1) & (size - 1)) + 1; 824 } 825 826 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 827 const void *data, size_t size) 828 { 829 struct xe_device *xe = lrc_to_xe(lrc); 830 831 iosys_map_incr(&ring, lrc->ring.tail); 832 xe_map_memcpy_to(xe, &ring, 0, data, size); 833 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 834 } 835 836 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 837 { 838 struct xe_device *xe = lrc_to_xe(lrc); 839 struct iosys_map ring; 840 u32 rhs; 841 size_t aligned_size; 842 843 xe_assert(xe, IS_ALIGNED(size, 4)); 844 aligned_size = ALIGN(size, 8); 845 846 ring = __xe_lrc_ring_map(lrc); 847 848 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 849 rhs = lrc->ring.size - lrc->ring.tail; 850 if (size > rhs) { 851 __xe_lrc_write_ring(lrc, ring, data, rhs); 852 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 853 } else { 854 __xe_lrc_write_ring(lrc, ring, data, size); 855 } 856 857 if (aligned_size > size) { 858 u32 noop = MI_NOOP; 859 860 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 861 } 862 } 863 864 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 865 { 866 return lrc->desc | xe_lrc_ggtt_addr(lrc); 867 } 868 869 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 870 { 871 return __xe_lrc_seqno_ggtt_addr(lrc); 872 } 873 874 struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc) 875 { 876 return &xe_hw_fence_create(&lrc->fence_ctx, 877 __xe_lrc_seqno_map(lrc))->dma; 878 } 879 880 s32 xe_lrc_seqno(struct xe_lrc *lrc) 881 { 882 struct iosys_map map = __xe_lrc_seqno_map(lrc); 883 884 return xe_map_read32(lrc_to_xe(lrc), &map); 885 } 886 887 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 888 { 889 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 890 891 return xe_map_read32(lrc_to_xe(lrc), &map); 892 } 893 894 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 895 { 896 return __xe_lrc_start_seqno_ggtt_addr(lrc); 897 } 898 899 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 900 { 901 return __xe_lrc_parallel_ggtt_addr(lrc); 902 } 903 904 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 905 { 906 return __xe_lrc_parallel_map(lrc); 907 } 908 909 static int instr_dw(u32 cmd_header) 910 { 911 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 912 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 913 GFXPIPE_SINGLE_DW_CMD(0, 0)) 914 return 1; 915 916 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 917 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 918 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 919 920 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 921 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 922 } 923 924 static int dump_mi_command(struct drm_printer *p, 925 struct xe_gt *gt, 926 u32 *dw, 927 int remaining_dw) 928 { 929 u32 inst_header = *dw; 930 u32 numdw = instr_dw(inst_header); 931 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 932 int num_noop; 933 934 /* First check for commands that don't have/use a '# DW' field */ 935 switch (inst_header & MI_OPCODE) { 936 case MI_NOOP: 937 num_noop = 1; 938 while (num_noop < remaining_dw && 939 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 940 num_noop++; 941 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 942 return num_noop; 943 944 case MI_TOPOLOGY_FILTER: 945 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 946 return 1; 947 948 case MI_BATCH_BUFFER_END: 949 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 950 /* Return 'remaining_dw' to consume the rest of the LRC */ 951 return remaining_dw; 952 } 953 954 /* 955 * Any remaining commands include a # of dwords. We should make sure 956 * it doesn't exceed the remaining size of the LRC. 957 */ 958 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 959 numdw = remaining_dw; 960 961 switch (inst_header & MI_OPCODE) { 962 case MI_LOAD_REGISTER_IMM: 963 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 964 inst_header, (numdw - 1) / 2); 965 for (int i = 1; i < numdw; i += 2) 966 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 967 return numdw; 968 969 case MI_FORCE_WAKEUP: 970 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 971 return numdw; 972 973 default: 974 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 975 inst_header, opcode, numdw); 976 return numdw; 977 } 978 } 979 980 static int dump_gfxpipe_command(struct drm_printer *p, 981 struct xe_gt *gt, 982 u32 *dw, 983 int remaining_dw) 984 { 985 u32 numdw = instr_dw(*dw); 986 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 987 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 988 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 989 990 /* 991 * Make sure we haven't mis-parsed a number of dwords that exceeds the 992 * remaining size of the LRC. 993 */ 994 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 995 numdw = remaining_dw; 996 997 switch (*dw & GFXPIPE_MATCH_MASK) { 998 #define MATCH(cmd) \ 999 case cmd: \ 1000 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1001 return numdw 1002 #define MATCH3D(cmd) \ 1003 case CMD_##cmd: \ 1004 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1005 return numdw 1006 1007 MATCH(STATE_BASE_ADDRESS); 1008 MATCH(STATE_SIP); 1009 MATCH(GPGPU_CSR_BASE_ADDRESS); 1010 MATCH(STATE_COMPUTE_MODE); 1011 MATCH3D(3DSTATE_BTD); 1012 1013 MATCH3D(3DSTATE_VF_STATISTICS); 1014 1015 MATCH(PIPELINE_SELECT); 1016 1017 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1018 MATCH3D(3DSTATE_CLEAR_PARAMS); 1019 MATCH3D(3DSTATE_DEPTH_BUFFER); 1020 MATCH3D(3DSTATE_STENCIL_BUFFER); 1021 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1022 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1023 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1024 MATCH3D(3DSTATE_INDEX_BUFFER); 1025 MATCH3D(3DSTATE_VF); 1026 MATCH3D(3DSTATE_MULTISAMPLE); 1027 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1028 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1029 MATCH3D(3DSTATE_VS); 1030 MATCH3D(3DSTATE_GS); 1031 MATCH3D(3DSTATE_CLIP); 1032 MATCH3D(3DSTATE_SF); 1033 MATCH3D(3DSTATE_WM); 1034 MATCH3D(3DSTATE_CONSTANT_VS); 1035 MATCH3D(3DSTATE_CONSTANT_GS); 1036 MATCH3D(3DSTATE_SAMPLE_MASK); 1037 MATCH3D(3DSTATE_CONSTANT_HS); 1038 MATCH3D(3DSTATE_CONSTANT_DS); 1039 MATCH3D(3DSTATE_HS); 1040 MATCH3D(3DSTATE_TE); 1041 MATCH3D(3DSTATE_DS); 1042 MATCH3D(3DSTATE_STREAMOUT); 1043 MATCH3D(3DSTATE_SBE); 1044 MATCH3D(3DSTATE_PS); 1045 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1046 MATCH3D(3DSTATE_CPS_POINTERS); 1047 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1048 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1049 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1050 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1051 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1052 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1053 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1054 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1055 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1056 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1057 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1058 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1059 MATCH3D(3DSTATE_VF_INSTANCING); 1060 MATCH3D(3DSTATE_VF_SGVS); 1061 MATCH3D(3DSTATE_VF_TOPOLOGY); 1062 MATCH3D(3DSTATE_WM_CHROMAKEY); 1063 MATCH3D(3DSTATE_PS_BLEND); 1064 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1065 MATCH3D(3DSTATE_PS_EXTRA); 1066 MATCH3D(3DSTATE_RASTER); 1067 MATCH3D(3DSTATE_SBE_SWIZ); 1068 MATCH3D(3DSTATE_WM_HZ_OP); 1069 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1070 MATCH3D(3DSTATE_VF_SGVS_2); 1071 MATCH3D(3DSTATE_VFG); 1072 MATCH3D(3DSTATE_URB_ALLOC_VS); 1073 MATCH3D(3DSTATE_URB_ALLOC_HS); 1074 MATCH3D(3DSTATE_URB_ALLOC_DS); 1075 MATCH3D(3DSTATE_URB_ALLOC_GS); 1076 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1077 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1078 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1079 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1080 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1081 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1082 MATCH3D(3DSTATE_AMFS); 1083 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1084 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1085 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1086 MATCH3D(3DSTATE_MESH_CONTROL); 1087 MATCH3D(3DSTATE_MESH_DISTRIB); 1088 MATCH3D(3DSTATE_TASK_REDISTRIB); 1089 MATCH3D(3DSTATE_MESH_SHADER); 1090 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1091 MATCH3D(3DSTATE_TASK_CONTROL); 1092 MATCH3D(3DSTATE_TASK_SHADER); 1093 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1094 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1095 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1096 MATCH3D(3DSTATE_CLIP_MESH); 1097 MATCH3D(3DSTATE_SBE_MESH); 1098 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1099 1100 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1101 MATCH3D(3DSTATE_CHROMA_KEY); 1102 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1103 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1104 MATCH3D(3DSTATE_LINE_STIPPLE); 1105 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1106 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1107 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1108 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1109 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1110 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1111 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1112 MATCH3D(3DSTATE_SO_DECL_LIST); 1113 MATCH3D(3DSTATE_SO_BUFFER); 1114 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1115 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1116 MATCH3D(3DSTATE_3D_MODE); 1117 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1118 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1119 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1120 1121 default: 1122 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1123 *dw, pipeline, opcode, subopcode, numdw); 1124 return numdw; 1125 } 1126 } 1127 1128 void xe_lrc_dump_default(struct drm_printer *p, 1129 struct xe_gt *gt, 1130 enum xe_engine_class hwe_class) 1131 { 1132 u32 *dw; 1133 int remaining_dw, num_dw; 1134 1135 if (!gt->default_lrc[hwe_class]) { 1136 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1137 return; 1138 } 1139 1140 /* 1141 * Skip the beginning of the LRC since it contains the per-process 1142 * hardware status page. 1143 */ 1144 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1145 remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4; 1146 1147 while (remaining_dw > 0) { 1148 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1149 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1150 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1151 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1152 } else { 1153 num_dw = min(instr_dw(*dw), remaining_dw); 1154 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 1155 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 1156 num_dw); 1157 } 1158 1159 dw += num_dw; 1160 remaining_dw -= num_dw; 1161 } 1162 } 1163 1164 struct instr_state { 1165 u32 instr; 1166 u16 num_dw; 1167 }; 1168 1169 static const struct instr_state xe_hpg_svg_state[] = { 1170 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 1171 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 1172 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 1173 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 1174 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 1175 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 1176 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 1177 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 1178 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 1179 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 1180 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 1181 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 1182 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 1183 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 1184 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 1185 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 1186 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 1187 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 1188 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 1189 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 1190 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 1191 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 1192 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 1193 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 1194 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 1195 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 1196 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 1197 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 1198 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 1199 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 1200 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 1201 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 1202 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 1203 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 1204 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 1205 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 1206 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 1207 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 1208 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 1209 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 1210 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 1211 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 1212 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 1213 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 1214 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 1215 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 1216 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 1217 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 1218 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 1219 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 1220 }; 1221 1222 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) 1223 { 1224 struct xe_gt *gt = q->hwe->gt; 1225 struct xe_device *xe = gt_to_xe(gt); 1226 const struct instr_state *state_table = NULL; 1227 int state_table_size = 0; 1228 1229 /* 1230 * At the moment we only need to emit non-register state for the RCS 1231 * engine. 1232 */ 1233 if (q->hwe->class != XE_ENGINE_CLASS_RENDER) 1234 return; 1235 1236 switch (GRAPHICS_VERx100(xe)) { 1237 case 1255: 1238 case 1270 ... 2004: 1239 state_table = xe_hpg_svg_state; 1240 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1241 break; 1242 default: 1243 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1244 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1245 return; 1246 } 1247 1248 for (int i = 0; i < state_table_size; i++) { 1249 u32 instr = state_table[i].instr; 1250 u16 num_dw = state_table[i].num_dw; 1251 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 1252 1253 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 1254 xe_gt_assert(gt, num_dw != 0); 1255 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 1256 1257 /* 1258 * Xe2's SVG context is the same as the one on DG2 / MTL 1259 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 1260 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 1261 * Just make the replacement here rather than defining a 1262 * whole separate table for the single trivial change. 1263 */ 1264 if (GRAPHICS_VER(xe) >= 20 && 1265 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 1266 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 1267 1268 bb->cs[bb->len] = instr; 1269 if (!is_single_dw) 1270 bb->cs[bb->len] |= (num_dw - 2); 1271 1272 bb->len += num_dw; 1273 } 1274 } 1275