1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include "instructions/xe_mi_commands.h" 9 #include "instructions/xe_gfxpipe_commands.h" 10 #include "regs/xe_engine_regs.h" 11 #include "regs/xe_gpu_commands.h" 12 #include "regs/xe_lrc_layout.h" 13 #include "xe_bb.h" 14 #include "xe_bo.h" 15 #include "xe_device.h" 16 #include "xe_drm_client.h" 17 #include "xe_exec_queue_types.h" 18 #include "xe_gt.h" 19 #include "xe_gt_printk.h" 20 #include "xe_hw_fence.h" 21 #include "xe_map.h" 22 #include "xe_vm.h" 23 24 #define LRC_VALID (1 << 0) 25 #define LRC_PRIVILEGE (1 << 8) 26 #define LRC_ADDRESSING_MODE_SHIFT 3 27 #define LRC_LEGACY_64B_CONTEXT 3 28 29 #define ENGINE_CLASS_SHIFT 61 30 #define ENGINE_INSTANCE_SHIFT 48 31 32 static struct xe_device * 33 lrc_to_xe(struct xe_lrc *lrc) 34 { 35 return gt_to_xe(lrc->fence_ctx.gt); 36 } 37 38 size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) 39 { 40 switch (class) { 41 case XE_ENGINE_CLASS_RENDER: 42 if (GRAPHICS_VER(xe) >= 20) 43 return 4 * SZ_4K; 44 else 45 return 14 * SZ_4K; 46 case XE_ENGINE_CLASS_COMPUTE: 47 /* 14 pages since graphics_ver == 11 */ 48 if (GRAPHICS_VER(xe) >= 20) 49 return 3 * SZ_4K; 50 else 51 return 14 * SZ_4K; 52 default: 53 WARN(1, "Unknown engine class: %d", class); 54 fallthrough; 55 case XE_ENGINE_CLASS_COPY: 56 case XE_ENGINE_CLASS_VIDEO_DECODE: 57 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 58 case XE_ENGINE_CLASS_OTHER: 59 return 2 * SZ_4K; 60 } 61 } 62 63 /* 64 * The per-platform tables are u8-encoded in @data. Decode @data and set the 65 * addresses' offset and commands in @regs. The following encoding is used 66 * for each byte. There are 2 steps: decoding commands and decoding addresses. 67 * 68 * Commands: 69 * [7]: create NOPs - number of NOPs are set in lower bits 70 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 71 * MI_LRI_FORCE_POSTED 72 * [5:0]: Number of NOPs or registers to set values to in case of 73 * MI_LOAD_REGISTER_IMM 74 * 75 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 76 * number of registers. They are set by using the REG/REG16 macros: the former 77 * is used for offsets smaller than 0x200 while the latter is for values bigger 78 * than that. Those macros already set all the bits documented below correctly: 79 * 80 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 81 * follow, for the lower bits 82 * [6:0]: Register offset, without considering the engine base. 83 * 84 * This function only tweaks the commands and register offsets. Values are not 85 * filled out. 86 */ 87 static void set_offsets(u32 *regs, 88 const u8 *data, 89 const struct xe_hw_engine *hwe) 90 #define NOP(x) (BIT(7) | (x)) 91 #define LRI(count, flags) ((flags) << 6 | (count) | \ 92 BUILD_BUG_ON_ZERO(count >= BIT(6))) 93 #define POSTED BIT(0) 94 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 95 #define REG16(x) \ 96 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 97 (((x) >> 2) & 0x7f) 98 #define END 0 99 { 100 const u32 base = hwe->mmio_base; 101 102 while (*data) { 103 u8 count, flags; 104 105 if (*data & BIT(7)) { /* skip */ 106 count = *data++ & ~BIT(7); 107 regs += count; 108 continue; 109 } 110 111 count = *data & 0x3f; 112 flags = *data >> 6; 113 data++; 114 115 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 116 if (flags & POSTED) 117 *regs |= MI_LRI_FORCE_POSTED; 118 *regs |= MI_LRI_LRM_CS_MMIO; 119 regs++; 120 121 xe_gt_assert(hwe->gt, count); 122 do { 123 u32 offset = 0; 124 u8 v; 125 126 do { 127 v = *data++; 128 offset <<= 7; 129 offset |= v & ~BIT(7); 130 } while (v & BIT(7)); 131 132 regs[0] = base + (offset << 2); 133 regs += 2; 134 } while (--count); 135 } 136 137 *regs = MI_BATCH_BUFFER_END | BIT(0); 138 } 139 140 static const u8 gen12_xcs_offsets[] = { 141 NOP(1), 142 LRI(13, POSTED), 143 REG16(0x244), 144 REG(0x034), 145 REG(0x030), 146 REG(0x038), 147 REG(0x03c), 148 REG(0x168), 149 REG(0x140), 150 REG(0x110), 151 REG(0x1c0), 152 REG(0x1c4), 153 REG(0x1c8), 154 REG(0x180), 155 REG16(0x2b4), 156 157 NOP(5), 158 LRI(9, POSTED), 159 REG16(0x3a8), 160 REG16(0x28c), 161 REG16(0x288), 162 REG16(0x284), 163 REG16(0x280), 164 REG16(0x27c), 165 REG16(0x278), 166 REG16(0x274), 167 REG16(0x270), 168 169 END 170 }; 171 172 static const u8 dg2_xcs_offsets[] = { 173 NOP(1), 174 LRI(15, POSTED), 175 REG16(0x244), 176 REG(0x034), 177 REG(0x030), 178 REG(0x038), 179 REG(0x03c), 180 REG(0x168), 181 REG(0x140), 182 REG(0x110), 183 REG(0x1c0), 184 REG(0x1c4), 185 REG(0x1c8), 186 REG(0x180), 187 REG16(0x2b4), 188 REG(0x120), 189 REG(0x124), 190 191 NOP(1), 192 LRI(9, POSTED), 193 REG16(0x3a8), 194 REG16(0x28c), 195 REG16(0x288), 196 REG16(0x284), 197 REG16(0x280), 198 REG16(0x27c), 199 REG16(0x278), 200 REG16(0x274), 201 REG16(0x270), 202 203 END 204 }; 205 206 static const u8 gen12_rcs_offsets[] = { 207 NOP(1), 208 LRI(13, POSTED), 209 REG16(0x244), 210 REG(0x034), 211 REG(0x030), 212 REG(0x038), 213 REG(0x03c), 214 REG(0x168), 215 REG(0x140), 216 REG(0x110), 217 REG(0x1c0), 218 REG(0x1c4), 219 REG(0x1c8), 220 REG(0x180), 221 REG16(0x2b4), 222 223 NOP(5), 224 LRI(9, POSTED), 225 REG16(0x3a8), 226 REG16(0x28c), 227 REG16(0x288), 228 REG16(0x284), 229 REG16(0x280), 230 REG16(0x27c), 231 REG16(0x278), 232 REG16(0x274), 233 REG16(0x270), 234 235 LRI(3, POSTED), 236 REG(0x1b0), 237 REG16(0x5a8), 238 REG16(0x5ac), 239 240 NOP(6), 241 LRI(1, 0), 242 REG(0x0c8), 243 NOP(3 + 9 + 1), 244 245 LRI(51, POSTED), 246 REG16(0x588), 247 REG16(0x588), 248 REG16(0x588), 249 REG16(0x588), 250 REG16(0x588), 251 REG16(0x588), 252 REG(0x028), 253 REG(0x09c), 254 REG(0x0c0), 255 REG(0x178), 256 REG(0x17c), 257 REG16(0x358), 258 REG(0x170), 259 REG(0x150), 260 REG(0x154), 261 REG(0x158), 262 REG16(0x41c), 263 REG16(0x600), 264 REG16(0x604), 265 REG16(0x608), 266 REG16(0x60c), 267 REG16(0x610), 268 REG16(0x614), 269 REG16(0x618), 270 REG16(0x61c), 271 REG16(0x620), 272 REG16(0x624), 273 REG16(0x628), 274 REG16(0x62c), 275 REG16(0x630), 276 REG16(0x634), 277 REG16(0x638), 278 REG16(0x63c), 279 REG16(0x640), 280 REG16(0x644), 281 REG16(0x648), 282 REG16(0x64c), 283 REG16(0x650), 284 REG16(0x654), 285 REG16(0x658), 286 REG16(0x65c), 287 REG16(0x660), 288 REG16(0x664), 289 REG16(0x668), 290 REG16(0x66c), 291 REG16(0x670), 292 REG16(0x674), 293 REG16(0x678), 294 REG16(0x67c), 295 REG(0x068), 296 REG(0x084), 297 NOP(1), 298 299 END 300 }; 301 302 static const u8 xehp_rcs_offsets[] = { 303 NOP(1), 304 LRI(13, POSTED), 305 REG16(0x244), 306 REG(0x034), 307 REG(0x030), 308 REG(0x038), 309 REG(0x03c), 310 REG(0x168), 311 REG(0x140), 312 REG(0x110), 313 REG(0x1c0), 314 REG(0x1c4), 315 REG(0x1c8), 316 REG(0x180), 317 REG16(0x2b4), 318 319 NOP(5), 320 LRI(9, POSTED), 321 REG16(0x3a8), 322 REG16(0x28c), 323 REG16(0x288), 324 REG16(0x284), 325 REG16(0x280), 326 REG16(0x27c), 327 REG16(0x278), 328 REG16(0x274), 329 REG16(0x270), 330 331 LRI(3, POSTED), 332 REG(0x1b0), 333 REG16(0x5a8), 334 REG16(0x5ac), 335 336 NOP(6), 337 LRI(1, 0), 338 REG(0x0c8), 339 340 END 341 }; 342 343 static const u8 dg2_rcs_offsets[] = { 344 NOP(1), 345 LRI(15, POSTED), 346 REG16(0x244), 347 REG(0x034), 348 REG(0x030), 349 REG(0x038), 350 REG(0x03c), 351 REG(0x168), 352 REG(0x140), 353 REG(0x110), 354 REG(0x1c0), 355 REG(0x1c4), 356 REG(0x1c8), 357 REG(0x180), 358 REG16(0x2b4), 359 REG(0x120), 360 REG(0x124), 361 362 NOP(1), 363 LRI(9, POSTED), 364 REG16(0x3a8), 365 REG16(0x28c), 366 REG16(0x288), 367 REG16(0x284), 368 REG16(0x280), 369 REG16(0x27c), 370 REG16(0x278), 371 REG16(0x274), 372 REG16(0x270), 373 374 LRI(3, POSTED), 375 REG(0x1b0), 376 REG16(0x5a8), 377 REG16(0x5ac), 378 379 NOP(6), 380 LRI(1, 0), 381 REG(0x0c8), 382 383 END 384 }; 385 386 static const u8 mtl_rcs_offsets[] = { 387 NOP(1), 388 LRI(15, POSTED), 389 REG16(0x244), 390 REG(0x034), 391 REG(0x030), 392 REG(0x038), 393 REG(0x03c), 394 REG(0x168), 395 REG(0x140), 396 REG(0x110), 397 REG(0x1c0), 398 REG(0x1c4), 399 REG(0x1c8), 400 REG(0x180), 401 REG16(0x2b4), 402 REG(0x120), 403 REG(0x124), 404 405 NOP(1), 406 LRI(9, POSTED), 407 REG16(0x3a8), 408 REG16(0x28c), 409 REG16(0x288), 410 REG16(0x284), 411 REG16(0x280), 412 REG16(0x27c), 413 REG16(0x278), 414 REG16(0x274), 415 REG16(0x270), 416 417 NOP(2), 418 LRI(2, POSTED), 419 REG16(0x5a8), 420 REG16(0x5ac), 421 422 NOP(6), 423 LRI(1, 0), 424 REG(0x0c8), 425 426 END 427 }; 428 429 #define XE2_CTX_COMMON \ 430 NOP(1), /* [0x00] */ \ 431 LRI(15, POSTED), /* [0x01] */ \ 432 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 433 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 434 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 435 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 436 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 437 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 438 REG(0x140), /* [0x0e] BB_ADDR */ \ 439 REG(0x110), /* [0x10] BB_STATE */ \ 440 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 441 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 442 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 443 REG(0x180), /* [0x18] CCID */ \ 444 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 445 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 446 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 447 \ 448 NOP(1), /* [0x20] */ \ 449 LRI(9, POSTED), /* [0x21] */ \ 450 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 451 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 452 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 453 REG16(0x284), /* [0x28] dummy reg */ \ 454 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 455 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 456 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 457 REG16(0x274), /* [0x30] PTBP_UDW */ \ 458 REG16(0x270) /* [0x32] PTBP_LDW */ 459 460 static const u8 xe2_rcs_offsets[] = { 461 XE2_CTX_COMMON, 462 463 NOP(2), /* [0x34] */ 464 LRI(2, POSTED), /* [0x36] */ 465 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 466 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 467 468 NOP(6), /* [0x41] */ 469 LRI(1, 0), /* [0x47] */ 470 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 471 472 END 473 }; 474 475 static const u8 xe2_bcs_offsets[] = { 476 XE2_CTX_COMMON, 477 478 NOP(4 + 8 + 1), /* [0x34] */ 479 LRI(2, POSTED), /* [0x41] */ 480 REG16(0x200), /* [0x42] BCS_SWCTRL */ 481 REG16(0x204), /* [0x44] BLIT_CCTL */ 482 483 END 484 }; 485 486 static const u8 xe2_xcs_offsets[] = { 487 XE2_CTX_COMMON, 488 489 END 490 }; 491 492 #undef END 493 #undef REG16 494 #undef REG 495 #undef LRI 496 #undef NOP 497 498 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 499 { 500 if (class == XE_ENGINE_CLASS_RENDER) { 501 if (GRAPHICS_VER(xe) >= 20) 502 return xe2_rcs_offsets; 503 else if (GRAPHICS_VERx100(xe) >= 1270) 504 return mtl_rcs_offsets; 505 else if (GRAPHICS_VERx100(xe) >= 1255) 506 return dg2_rcs_offsets; 507 else if (GRAPHICS_VERx100(xe) >= 1250) 508 return xehp_rcs_offsets; 509 else 510 return gen12_rcs_offsets; 511 } else if (class == XE_ENGINE_CLASS_COPY) { 512 if (GRAPHICS_VER(xe) >= 20) 513 return xe2_bcs_offsets; 514 else 515 return gen12_xcs_offsets; 516 } else { 517 if (GRAPHICS_VER(xe) >= 20) 518 return xe2_xcs_offsets; 519 else if (GRAPHICS_VERx100(xe) >= 1255) 520 return dg2_xcs_offsets; 521 else 522 return gen12_xcs_offsets; 523 } 524 } 525 526 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 527 { 528 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | 529 _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | 530 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; 531 532 /* TODO: Timestamp */ 533 } 534 535 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 536 { 537 struct xe_device *xe = gt_to_xe(hwe->gt); 538 539 if (GRAPHICS_VERx100(xe) >= 1250) 540 return 0x70; 541 else 542 return 0x60; 543 } 544 545 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 546 { 547 int x; 548 549 x = lrc_ring_mi_mode(hwe); 550 regs[x + 1] &= ~STOP_RING; 551 regs[x + 1] |= STOP_RING << 16; 552 } 553 554 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 555 { 556 return 0; 557 } 558 559 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 560 { 561 return lrc->ring.size; 562 } 563 564 /* Make the magic macros work */ 565 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 566 567 #define LRC_SEQNO_PPHWSP_OFFSET 512 568 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 569 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 570 #define LRC_PPHWSP_SIZE SZ_4K 571 572 static size_t lrc_reg_size(struct xe_device *xe) 573 { 574 if (GRAPHICS_VERx100(xe) >= 1250) 575 return 96 * sizeof(u32); 576 else 577 return 80 * sizeof(u32); 578 } 579 580 size_t xe_lrc_skip_size(struct xe_device *xe) 581 { 582 return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 583 } 584 585 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 586 { 587 /* The seqno is stored in the driver-defined portion of PPHWSP */ 588 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 589 } 590 591 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 592 { 593 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 594 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 595 } 596 597 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 598 { 599 /* The parallel is stored in the driver-defined portion of PPHWSP */ 600 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 601 } 602 603 static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) 604 { 605 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 606 } 607 608 #define DECL_MAP_ADDR_HELPERS(elem) \ 609 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 610 { \ 611 struct iosys_map map = lrc->bo->vmap; \ 612 \ 613 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 614 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 615 return map; \ 616 } \ 617 static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 618 { \ 619 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 620 } \ 621 622 DECL_MAP_ADDR_HELPERS(ring) 623 DECL_MAP_ADDR_HELPERS(pphwsp) 624 DECL_MAP_ADDR_HELPERS(seqno) 625 DECL_MAP_ADDR_HELPERS(regs) 626 DECL_MAP_ADDR_HELPERS(start_seqno) 627 DECL_MAP_ADDR_HELPERS(parallel) 628 629 #undef DECL_MAP_ADDR_HELPERS 630 631 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 632 { 633 return __xe_lrc_pphwsp_ggtt_addr(lrc); 634 } 635 636 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 637 { 638 struct xe_device *xe = lrc_to_xe(lrc); 639 struct iosys_map map; 640 641 map = __xe_lrc_regs_map(lrc); 642 iosys_map_incr(&map, reg_nr * sizeof(u32)); 643 return xe_map_read32(xe, &map); 644 } 645 646 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 647 { 648 struct xe_device *xe = lrc_to_xe(lrc); 649 struct iosys_map map; 650 651 map = __xe_lrc_regs_map(lrc); 652 iosys_map_incr(&map, reg_nr * sizeof(u32)); 653 xe_map_write32(xe, &map, val); 654 } 655 656 static void *empty_lrc_data(struct xe_hw_engine *hwe) 657 { 658 struct xe_device *xe = gt_to_xe(hwe->gt); 659 void *data; 660 u32 *regs; 661 662 data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); 663 if (!data) 664 return NULL; 665 666 /* 1st page: Per-Process of HW status Page */ 667 regs = data + LRC_PPHWSP_SIZE; 668 set_offsets(regs, reg_offsets(xe, hwe->class), hwe); 669 set_context_control(regs, hwe); 670 reset_stop_ring(regs, hwe); 671 672 return data; 673 } 674 675 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 676 { 677 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); 678 679 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 680 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 681 } 682 683 #define PVC_CTX_ASID (0x2e + 1) 684 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 685 686 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 687 struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) 688 { 689 struct xe_gt *gt = hwe->gt; 690 struct xe_tile *tile = gt_to_tile(gt); 691 struct xe_device *xe = gt_to_xe(gt); 692 struct iosys_map map; 693 void *init_data = NULL; 694 u32 arb_enable; 695 int err; 696 697 lrc->flags = 0; 698 699 /* 700 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 701 * via VM bind calls. 702 */ 703 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, 704 ring_size + xe_lrc_size(xe, hwe->class), 705 ttm_bo_type_kernel, 706 XE_BO_CREATE_VRAM_IF_DGFX(tile) | 707 XE_BO_CREATE_GGTT_BIT); 708 if (IS_ERR(lrc->bo)) 709 return PTR_ERR(lrc->bo); 710 711 lrc->tile = gt_to_tile(hwe->gt); 712 lrc->ring.size = ring_size; 713 lrc->ring.tail = 0; 714 715 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 716 hwe->fence_irq, hwe->name); 717 718 if (!gt->default_lrc[hwe->class]) { 719 init_data = empty_lrc_data(hwe); 720 if (!init_data) { 721 err = -ENOMEM; 722 goto err_lrc_finish; 723 } 724 } 725 726 /* 727 * Init Per-Process of HW status Page, LRC / context state to known 728 * values 729 */ 730 map = __xe_lrc_pphwsp_map(lrc); 731 if (!init_data) { 732 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 733 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 734 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 735 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); 736 } else { 737 xe_map_memcpy_to(xe, &map, 0, init_data, 738 xe_lrc_size(xe, hwe->class)); 739 kfree(init_data); 740 } 741 742 if (vm) { 743 xe_lrc_set_ppgtt(lrc, vm); 744 745 if (vm->xef) 746 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 747 } 748 749 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 750 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 751 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 752 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 753 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 754 if (xe->info.has_asid && vm) 755 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); 756 757 lrc->desc = LRC_VALID; 758 lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; 759 /* TODO: Priority */ 760 761 /* While this appears to have something about privileged batches or 762 * some such, it really just means PPGTT mode. 763 */ 764 if (vm) 765 lrc->desc |= LRC_PRIVILEGE; 766 767 if (GRAPHICS_VERx100(xe) < 1250) { 768 lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; 769 lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT; 770 } 771 772 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 773 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 774 775 map = __xe_lrc_seqno_map(lrc); 776 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 777 778 map = __xe_lrc_start_seqno_map(lrc); 779 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 780 781 return 0; 782 783 err_lrc_finish: 784 xe_lrc_finish(lrc); 785 return err; 786 } 787 788 void xe_lrc_finish(struct xe_lrc *lrc) 789 { 790 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 791 xe_bo_lock(lrc->bo, false); 792 xe_bo_unpin(lrc->bo); 793 xe_bo_unlock(lrc->bo); 794 xe_bo_put(lrc->bo); 795 } 796 797 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 798 { 799 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 800 } 801 802 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 803 { 804 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 805 } 806 807 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 808 { 809 const u32 head = xe_lrc_ring_head(lrc); 810 const u32 tail = lrc->ring.tail; 811 const u32 size = lrc->ring.size; 812 813 return ((head - tail - 1) & (size - 1)) + 1; 814 } 815 816 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 817 const void *data, size_t size) 818 { 819 struct xe_device *xe = lrc_to_xe(lrc); 820 821 iosys_map_incr(&ring, lrc->ring.tail); 822 xe_map_memcpy_to(xe, &ring, 0, data, size); 823 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 824 } 825 826 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 827 { 828 struct xe_device *xe = lrc_to_xe(lrc); 829 struct iosys_map ring; 830 u32 rhs; 831 size_t aligned_size; 832 833 xe_assert(xe, IS_ALIGNED(size, 4)); 834 aligned_size = ALIGN(size, 8); 835 836 ring = __xe_lrc_ring_map(lrc); 837 838 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 839 rhs = lrc->ring.size - lrc->ring.tail; 840 if (size > rhs) { 841 __xe_lrc_write_ring(lrc, ring, data, rhs); 842 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 843 } else { 844 __xe_lrc_write_ring(lrc, ring, data, size); 845 } 846 847 if (aligned_size > size) { 848 u32 noop = MI_NOOP; 849 850 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 851 } 852 } 853 854 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 855 { 856 return lrc->desc | xe_lrc_ggtt_addr(lrc); 857 } 858 859 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 860 { 861 return __xe_lrc_seqno_ggtt_addr(lrc); 862 } 863 864 struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc) 865 { 866 return &xe_hw_fence_create(&lrc->fence_ctx, 867 __xe_lrc_seqno_map(lrc))->dma; 868 } 869 870 s32 xe_lrc_seqno(struct xe_lrc *lrc) 871 { 872 struct iosys_map map = __xe_lrc_seqno_map(lrc); 873 874 return xe_map_read32(lrc_to_xe(lrc), &map); 875 } 876 877 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 878 { 879 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 880 881 return xe_map_read32(lrc_to_xe(lrc), &map); 882 } 883 884 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 885 { 886 return __xe_lrc_start_seqno_ggtt_addr(lrc); 887 } 888 889 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 890 { 891 return __xe_lrc_parallel_ggtt_addr(lrc); 892 } 893 894 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 895 { 896 return __xe_lrc_parallel_map(lrc); 897 } 898 899 static int instr_dw(u32 cmd_header) 900 { 901 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 902 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 903 GFXPIPE_SINGLE_DW_CMD(0, 0)) 904 return 1; 905 906 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 907 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 908 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 909 910 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 911 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 912 } 913 914 static int dump_mi_command(struct drm_printer *p, 915 struct xe_gt *gt, 916 u32 *dw, 917 int remaining_dw) 918 { 919 u32 inst_header = *dw; 920 u32 numdw = instr_dw(inst_header); 921 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 922 int num_noop; 923 924 /* First check for commands that don't have/use a '# DW' field */ 925 switch (inst_header & MI_OPCODE) { 926 case MI_NOOP: 927 num_noop = 1; 928 while (num_noop < remaining_dw && 929 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 930 num_noop++; 931 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 932 return num_noop; 933 934 case MI_TOPOLOGY_FILTER: 935 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 936 return 1; 937 938 case MI_BATCH_BUFFER_END: 939 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 940 /* Return 'remaining_dw' to consume the rest of the LRC */ 941 return remaining_dw; 942 } 943 944 /* 945 * Any remaining commands include a # of dwords. We should make sure 946 * it doesn't exceed the remaining size of the LRC. 947 */ 948 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 949 numdw = remaining_dw; 950 951 switch (inst_header & MI_OPCODE) { 952 case MI_LOAD_REGISTER_IMM: 953 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 954 inst_header, (numdw - 1) / 2); 955 for (int i = 1; i < numdw; i += 2) 956 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 957 return numdw; 958 959 case MI_FORCE_WAKEUP: 960 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 961 return numdw; 962 963 default: 964 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 965 inst_header, opcode, numdw); 966 return numdw; 967 } 968 } 969 970 static int dump_gfxpipe_command(struct drm_printer *p, 971 struct xe_gt *gt, 972 u32 *dw, 973 int remaining_dw) 974 { 975 u32 numdw = instr_dw(*dw); 976 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 977 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 978 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 979 980 /* 981 * Make sure we haven't mis-parsed a number of dwords that exceeds the 982 * remaining size of the LRC. 983 */ 984 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 985 numdw = remaining_dw; 986 987 switch (*dw & GFXPIPE_MATCH_MASK) { 988 #define MATCH(cmd) \ 989 case cmd: \ 990 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 991 return numdw 992 #define MATCH3D(cmd) \ 993 case CMD_##cmd: \ 994 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 995 return numdw 996 997 MATCH(STATE_BASE_ADDRESS); 998 MATCH(STATE_SIP); 999 MATCH(GPGPU_CSR_BASE_ADDRESS); 1000 MATCH(STATE_COMPUTE_MODE); 1001 MATCH3D(3DSTATE_BTD); 1002 1003 MATCH3D(3DSTATE_VF_STATISTICS); 1004 1005 MATCH(PIPELINE_SELECT); 1006 1007 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1008 MATCH3D(3DSTATE_CLEAR_PARAMS); 1009 MATCH3D(3DSTATE_DEPTH_BUFFER); 1010 MATCH3D(3DSTATE_STENCIL_BUFFER); 1011 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1012 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1013 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1014 MATCH3D(3DSTATE_INDEX_BUFFER); 1015 MATCH3D(3DSTATE_VF); 1016 MATCH3D(3DSTATE_MULTISAMPLE); 1017 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1018 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1019 MATCH3D(3DSTATE_VS); 1020 MATCH3D(3DSTATE_GS); 1021 MATCH3D(3DSTATE_CLIP); 1022 MATCH3D(3DSTATE_SF); 1023 MATCH3D(3DSTATE_WM); 1024 MATCH3D(3DSTATE_CONSTANT_VS); 1025 MATCH3D(3DSTATE_CONSTANT_GS); 1026 MATCH3D(3DSTATE_SAMPLE_MASK); 1027 MATCH3D(3DSTATE_CONSTANT_HS); 1028 MATCH3D(3DSTATE_CONSTANT_DS); 1029 MATCH3D(3DSTATE_HS); 1030 MATCH3D(3DSTATE_TE); 1031 MATCH3D(3DSTATE_DS); 1032 MATCH3D(3DSTATE_STREAMOUT); 1033 MATCH3D(3DSTATE_SBE); 1034 MATCH3D(3DSTATE_PS); 1035 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1036 MATCH3D(3DSTATE_CPS_POINTERS); 1037 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1038 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1039 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1040 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1041 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1042 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1043 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1044 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1045 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1046 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1047 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1048 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1049 MATCH3D(3DSTATE_VF_INSTANCING); 1050 MATCH3D(3DSTATE_VF_SGVS); 1051 MATCH3D(3DSTATE_VF_TOPOLOGY); 1052 MATCH3D(3DSTATE_WM_CHROMAKEY); 1053 MATCH3D(3DSTATE_PS_BLEND); 1054 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1055 MATCH3D(3DSTATE_PS_EXTRA); 1056 MATCH3D(3DSTATE_RASTER); 1057 MATCH3D(3DSTATE_SBE_SWIZ); 1058 MATCH3D(3DSTATE_WM_HZ_OP); 1059 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1060 MATCH3D(3DSTATE_VF_SGVS_2); 1061 MATCH3D(3DSTATE_VFG); 1062 MATCH3D(3DSTATE_URB_ALLOC_VS); 1063 MATCH3D(3DSTATE_URB_ALLOC_HS); 1064 MATCH3D(3DSTATE_URB_ALLOC_DS); 1065 MATCH3D(3DSTATE_URB_ALLOC_GS); 1066 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1067 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1068 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1069 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1070 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1071 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1072 MATCH3D(3DSTATE_AMFS); 1073 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1074 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1075 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1076 MATCH3D(3DSTATE_MESH_CONTROL); 1077 MATCH3D(3DSTATE_MESH_DISTRIB); 1078 MATCH3D(3DSTATE_TASK_REDISTRIB); 1079 MATCH3D(3DSTATE_MESH_SHADER); 1080 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1081 MATCH3D(3DSTATE_TASK_CONTROL); 1082 MATCH3D(3DSTATE_TASK_SHADER); 1083 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1084 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1085 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1086 MATCH3D(3DSTATE_CLIP_MESH); 1087 MATCH3D(3DSTATE_SBE_MESH); 1088 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1089 1090 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1091 MATCH3D(3DSTATE_CHROMA_KEY); 1092 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1093 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1094 MATCH3D(3DSTATE_LINE_STIPPLE); 1095 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1096 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1097 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1098 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1099 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1100 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1101 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1102 MATCH3D(3DSTATE_SO_DECL_LIST); 1103 MATCH3D(3DSTATE_SO_BUFFER); 1104 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1105 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1106 MATCH3D(3DSTATE_3D_MODE); 1107 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1108 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1109 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1110 1111 default: 1112 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1113 *dw, pipeline, opcode, subopcode, numdw); 1114 return numdw; 1115 } 1116 } 1117 1118 void xe_lrc_dump_default(struct drm_printer *p, 1119 struct xe_gt *gt, 1120 enum xe_engine_class hwe_class) 1121 { 1122 u32 *dw; 1123 int remaining_dw, num_dw; 1124 1125 if (!gt->default_lrc[hwe_class]) { 1126 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1127 return; 1128 } 1129 1130 /* 1131 * Skip the beginning of the LRC since it contains the per-process 1132 * hardware status page. 1133 */ 1134 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1135 remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4; 1136 1137 while (remaining_dw > 0) { 1138 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1139 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1140 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1141 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1142 } else { 1143 num_dw = min(instr_dw(*dw), remaining_dw); 1144 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 1145 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 1146 num_dw); 1147 } 1148 1149 dw += num_dw; 1150 remaining_dw -= num_dw; 1151 } 1152 } 1153 1154 struct instr_state { 1155 u32 instr; 1156 u16 num_dw; 1157 }; 1158 1159 static const struct instr_state xe_hpg_svg_state[] = { 1160 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 1161 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 1162 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 1163 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 1164 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 1165 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 1166 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 1167 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 1168 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 1169 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 1170 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 1171 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 1172 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 1173 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 1174 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 1175 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 1176 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 1177 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 1178 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 1179 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 1180 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 1181 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 1182 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 1183 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 1184 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 1185 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 1186 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 1187 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 1188 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 1189 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 1190 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 1191 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 1192 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 1193 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 1194 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 1195 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 1196 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 1197 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 1198 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 1199 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 1200 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 1201 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 1202 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 1203 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 1204 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 1205 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 1206 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 1207 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 1208 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 1209 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 1210 }; 1211 1212 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) 1213 { 1214 struct xe_gt *gt = q->hwe->gt; 1215 struct xe_device *xe = gt_to_xe(gt); 1216 const struct instr_state *state_table = NULL; 1217 int state_table_size = 0; 1218 1219 /* 1220 * At the moment we only need to emit non-register state for the RCS 1221 * engine. 1222 */ 1223 if (q->hwe->class != XE_ENGINE_CLASS_RENDER) 1224 return; 1225 1226 switch (GRAPHICS_VERx100(xe)) { 1227 case 1255: 1228 case 1270 ... 2004: 1229 state_table = xe_hpg_svg_state; 1230 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1231 break; 1232 default: 1233 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1234 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1235 return; 1236 } 1237 1238 for (int i = 0; i < state_table_size; i++) { 1239 u32 instr = state_table[i].instr; 1240 u16 num_dw = state_table[i].num_dw; 1241 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 1242 1243 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 1244 xe_gt_assert(gt, num_dw != 0); 1245 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 1246 1247 /* 1248 * Xe2's SVG context is the same as the one on DG2 / MTL 1249 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 1250 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 1251 * Just make the replacement here rather than defining a 1252 * whole separate table for the single trivial change. 1253 */ 1254 if (GRAPHICS_VER(xe) >= 20 && 1255 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 1256 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 1257 1258 bb->cs[bb->len] = instr; 1259 if (!is_single_dw) 1260 bb->cs[bb->len] |= (num_dw - 2); 1261 1262 bb->len += num_dw; 1263 } 1264 } 1265