1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include "instructions/xe_mi_commands.h" 9 #include "instructions/xe_gfxpipe_commands.h" 10 #include "regs/xe_engine_regs.h" 11 #include "regs/xe_gpu_commands.h" 12 #include "regs/xe_lrc_layout.h" 13 #include "xe_bb.h" 14 #include "xe_bo.h" 15 #include "xe_device.h" 16 #include "xe_drm_client.h" 17 #include "xe_exec_queue_types.h" 18 #include "xe_gt.h" 19 #include "xe_gt_printk.h" 20 #include "xe_hw_fence.h" 21 #include "xe_map.h" 22 #include "xe_vm.h" 23 24 #define CTX_VALID (1 << 0) 25 #define CTX_PRIVILEGE (1 << 8) 26 #define CTX_ADDRESSING_MODE_SHIFT 3 27 #define LEGACY_64B_CONTEXT 3 28 29 #define ENGINE_CLASS_SHIFT 61 30 #define ENGINE_INSTANCE_SHIFT 48 31 32 static struct xe_device * 33 lrc_to_xe(struct xe_lrc *lrc) 34 { 35 return gt_to_xe(lrc->fence_ctx.gt); 36 } 37 38 size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) 39 { 40 switch (class) { 41 case XE_ENGINE_CLASS_RENDER: 42 if (GRAPHICS_VER(xe) >= 20) 43 return 4 * SZ_4K; 44 else 45 return 14 * SZ_4K; 46 case XE_ENGINE_CLASS_COMPUTE: 47 /* 14 pages since graphics_ver == 11 */ 48 if (GRAPHICS_VER(xe) >= 20) 49 return 3 * SZ_4K; 50 else 51 return 14 * SZ_4K; 52 default: 53 WARN(1, "Unknown engine class: %d", class); 54 fallthrough; 55 case XE_ENGINE_CLASS_COPY: 56 case XE_ENGINE_CLASS_VIDEO_DECODE: 57 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 58 case XE_ENGINE_CLASS_OTHER: 59 return 2 * SZ_4K; 60 } 61 } 62 63 /* 64 * The per-platform tables are u8-encoded in @data. Decode @data and set the 65 * addresses' offset and commands in @regs. The following encoding is used 66 * for each byte. There are 2 steps: decoding commands and decoding addresses. 67 * 68 * Commands: 69 * [7]: create NOPs - number of NOPs are set in lower bits 70 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 71 * MI_LRI_FORCE_POSTED 72 * [5:0]: Number of NOPs or registers to set values to in case of 73 * MI_LOAD_REGISTER_IMM 74 * 75 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 76 * number of registers. They are set by using the REG/REG16 macros: the former 77 * is used for offsets smaller than 0x200 while the latter is for values bigger 78 * than that. Those macros already set all the bits documented below correctly: 79 * 80 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 81 * follow, for the lower bits 82 * [6:0]: Register offset, without considering the engine base. 83 * 84 * This function only tweaks the commands and register offsets. Values are not 85 * filled out. 86 */ 87 static void set_offsets(u32 *regs, 88 const u8 *data, 89 const struct xe_hw_engine *hwe) 90 #define NOP(x) (BIT(7) | (x)) 91 #define LRI(count, flags) ((flags) << 6 | (count) | \ 92 BUILD_BUG_ON_ZERO(count >= BIT(6))) 93 #define POSTED BIT(0) 94 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 95 #define REG16(x) \ 96 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 97 (((x) >> 2) & 0x7f) 98 #define END 0 99 { 100 const u32 base = hwe->mmio_base; 101 102 while (*data) { 103 u8 count, flags; 104 105 if (*data & BIT(7)) { /* skip */ 106 count = *data++ & ~BIT(7); 107 regs += count; 108 continue; 109 } 110 111 count = *data & 0x3f; 112 flags = *data >> 6; 113 data++; 114 115 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 116 if (flags & POSTED) 117 *regs |= MI_LRI_FORCE_POSTED; 118 *regs |= MI_LRI_LRM_CS_MMIO; 119 regs++; 120 121 xe_gt_assert(hwe->gt, count); 122 do { 123 u32 offset = 0; 124 u8 v; 125 126 do { 127 v = *data++; 128 offset <<= 7; 129 offset |= v & ~BIT(7); 130 } while (v & BIT(7)); 131 132 regs[0] = base + (offset << 2); 133 regs += 2; 134 } while (--count); 135 } 136 137 *regs = MI_BATCH_BUFFER_END | BIT(0); 138 } 139 140 static const u8 gen12_xcs_offsets[] = { 141 NOP(1), 142 LRI(13, POSTED), 143 REG16(0x244), 144 REG(0x034), 145 REG(0x030), 146 REG(0x038), 147 REG(0x03c), 148 REG(0x168), 149 REG(0x140), 150 REG(0x110), 151 REG(0x1c0), 152 REG(0x1c4), 153 REG(0x1c8), 154 REG(0x180), 155 REG16(0x2b4), 156 157 NOP(5), 158 LRI(9, POSTED), 159 REG16(0x3a8), 160 REG16(0x28c), 161 REG16(0x288), 162 REG16(0x284), 163 REG16(0x280), 164 REG16(0x27c), 165 REG16(0x278), 166 REG16(0x274), 167 REG16(0x270), 168 169 END 170 }; 171 172 static const u8 dg2_xcs_offsets[] = { 173 NOP(1), 174 LRI(15, POSTED), 175 REG16(0x244), 176 REG(0x034), 177 REG(0x030), 178 REG(0x038), 179 REG(0x03c), 180 REG(0x168), 181 REG(0x140), 182 REG(0x110), 183 REG(0x1c0), 184 REG(0x1c4), 185 REG(0x1c8), 186 REG(0x180), 187 REG16(0x2b4), 188 REG(0x120), 189 REG(0x124), 190 191 NOP(1), 192 LRI(9, POSTED), 193 REG16(0x3a8), 194 REG16(0x28c), 195 REG16(0x288), 196 REG16(0x284), 197 REG16(0x280), 198 REG16(0x27c), 199 REG16(0x278), 200 REG16(0x274), 201 REG16(0x270), 202 203 END 204 }; 205 206 static const u8 gen12_rcs_offsets[] = { 207 NOP(1), 208 LRI(13, POSTED), 209 REG16(0x244), 210 REG(0x034), 211 REG(0x030), 212 REG(0x038), 213 REG(0x03c), 214 REG(0x168), 215 REG(0x140), 216 REG(0x110), 217 REG(0x1c0), 218 REG(0x1c4), 219 REG(0x1c8), 220 REG(0x180), 221 REG16(0x2b4), 222 223 NOP(5), 224 LRI(9, POSTED), 225 REG16(0x3a8), 226 REG16(0x28c), 227 REG16(0x288), 228 REG16(0x284), 229 REG16(0x280), 230 REG16(0x27c), 231 REG16(0x278), 232 REG16(0x274), 233 REG16(0x270), 234 235 LRI(3, POSTED), 236 REG(0x1b0), 237 REG16(0x5a8), 238 REG16(0x5ac), 239 240 NOP(6), 241 LRI(1, 0), 242 REG(0x0c8), 243 NOP(3 + 9 + 1), 244 245 LRI(51, POSTED), 246 REG16(0x588), 247 REG16(0x588), 248 REG16(0x588), 249 REG16(0x588), 250 REG16(0x588), 251 REG16(0x588), 252 REG(0x028), 253 REG(0x09c), 254 REG(0x0c0), 255 REG(0x178), 256 REG(0x17c), 257 REG16(0x358), 258 REG(0x170), 259 REG(0x150), 260 REG(0x154), 261 REG(0x158), 262 REG16(0x41c), 263 REG16(0x600), 264 REG16(0x604), 265 REG16(0x608), 266 REG16(0x60c), 267 REG16(0x610), 268 REG16(0x614), 269 REG16(0x618), 270 REG16(0x61c), 271 REG16(0x620), 272 REG16(0x624), 273 REG16(0x628), 274 REG16(0x62c), 275 REG16(0x630), 276 REG16(0x634), 277 REG16(0x638), 278 REG16(0x63c), 279 REG16(0x640), 280 REG16(0x644), 281 REG16(0x648), 282 REG16(0x64c), 283 REG16(0x650), 284 REG16(0x654), 285 REG16(0x658), 286 REG16(0x65c), 287 REG16(0x660), 288 REG16(0x664), 289 REG16(0x668), 290 REG16(0x66c), 291 REG16(0x670), 292 REG16(0x674), 293 REG16(0x678), 294 REG16(0x67c), 295 REG(0x068), 296 REG(0x084), 297 NOP(1), 298 299 END 300 }; 301 302 static const u8 xehp_rcs_offsets[] = { 303 NOP(1), 304 LRI(13, POSTED), 305 REG16(0x244), 306 REG(0x034), 307 REG(0x030), 308 REG(0x038), 309 REG(0x03c), 310 REG(0x168), 311 REG(0x140), 312 REG(0x110), 313 REG(0x1c0), 314 REG(0x1c4), 315 REG(0x1c8), 316 REG(0x180), 317 REG16(0x2b4), 318 319 NOP(5), 320 LRI(9, POSTED), 321 REG16(0x3a8), 322 REG16(0x28c), 323 REG16(0x288), 324 REG16(0x284), 325 REG16(0x280), 326 REG16(0x27c), 327 REG16(0x278), 328 REG16(0x274), 329 REG16(0x270), 330 331 LRI(3, POSTED), 332 REG(0x1b0), 333 REG16(0x5a8), 334 REG16(0x5ac), 335 336 NOP(6), 337 LRI(1, 0), 338 REG(0x0c8), 339 340 END 341 }; 342 343 static const u8 dg2_rcs_offsets[] = { 344 NOP(1), 345 LRI(15, POSTED), 346 REG16(0x244), 347 REG(0x034), 348 REG(0x030), 349 REG(0x038), 350 REG(0x03c), 351 REG(0x168), 352 REG(0x140), 353 REG(0x110), 354 REG(0x1c0), 355 REG(0x1c4), 356 REG(0x1c8), 357 REG(0x180), 358 REG16(0x2b4), 359 REG(0x120), 360 REG(0x124), 361 362 NOP(1), 363 LRI(9, POSTED), 364 REG16(0x3a8), 365 REG16(0x28c), 366 REG16(0x288), 367 REG16(0x284), 368 REG16(0x280), 369 REG16(0x27c), 370 REG16(0x278), 371 REG16(0x274), 372 REG16(0x270), 373 374 LRI(3, POSTED), 375 REG(0x1b0), 376 REG16(0x5a8), 377 REG16(0x5ac), 378 379 NOP(6), 380 LRI(1, 0), 381 REG(0x0c8), 382 383 END 384 }; 385 386 static const u8 mtl_rcs_offsets[] = { 387 NOP(1), 388 LRI(15, POSTED), 389 REG16(0x244), 390 REG(0x034), 391 REG(0x030), 392 REG(0x038), 393 REG(0x03c), 394 REG(0x168), 395 REG(0x140), 396 REG(0x110), 397 REG(0x1c0), 398 REG(0x1c4), 399 REG(0x1c8), 400 REG(0x180), 401 REG16(0x2b4), 402 REG(0x120), 403 REG(0x124), 404 405 NOP(1), 406 LRI(9, POSTED), 407 REG16(0x3a8), 408 REG16(0x28c), 409 REG16(0x288), 410 REG16(0x284), 411 REG16(0x280), 412 REG16(0x27c), 413 REG16(0x278), 414 REG16(0x274), 415 REG16(0x270), 416 417 NOP(2), 418 LRI(2, POSTED), 419 REG16(0x5a8), 420 REG16(0x5ac), 421 422 NOP(6), 423 LRI(1, 0), 424 REG(0x0c8), 425 426 END 427 }; 428 429 #define XE2_CTX_COMMON \ 430 NOP(1), /* [0x00] */ \ 431 LRI(15, POSTED), /* [0x01] */ \ 432 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 433 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 434 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 435 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 436 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 437 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 438 REG(0x140), /* [0x0e] BB_ADDR */ \ 439 REG(0x110), /* [0x10] BB_STATE */ \ 440 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 441 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 442 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 443 REG(0x180), /* [0x18] CCID */ \ 444 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 445 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 446 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 447 \ 448 NOP(1), /* [0x20] */ \ 449 LRI(9, POSTED), /* [0x21] */ \ 450 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 451 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 452 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 453 REG16(0x284), /* [0x28] dummy reg */ \ 454 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 455 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 456 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 457 REG16(0x274), /* [0x30] PTBP_UDW */ \ 458 REG16(0x270) /* [0x32] PTBP_LDW */ 459 460 static const u8 xe2_rcs_offsets[] = { 461 XE2_CTX_COMMON, 462 463 NOP(2), /* [0x34] */ 464 LRI(2, POSTED), /* [0x36] */ 465 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 466 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 467 468 NOP(6), /* [0x41] */ 469 LRI(1, 0), /* [0x47] */ 470 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 471 472 END 473 }; 474 475 static const u8 xe2_bcs_offsets[] = { 476 XE2_CTX_COMMON, 477 478 NOP(4 + 8 + 1), /* [0x34] */ 479 LRI(2, POSTED), /* [0x41] */ 480 REG16(0x200), /* [0x42] BCS_SWCTRL */ 481 REG16(0x204), /* [0x44] BLIT_CCTL */ 482 483 END 484 }; 485 486 static const u8 xe2_xcs_offsets[] = { 487 XE2_CTX_COMMON, 488 489 END 490 }; 491 492 #undef END 493 #undef REG16 494 #undef REG 495 #undef LRI 496 #undef NOP 497 498 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 499 { 500 if (class == XE_ENGINE_CLASS_RENDER) { 501 if (GRAPHICS_VER(xe) >= 20) 502 return xe2_rcs_offsets; 503 else if (GRAPHICS_VERx100(xe) >= 1270) 504 return mtl_rcs_offsets; 505 else if (GRAPHICS_VERx100(xe) >= 1255) 506 return dg2_rcs_offsets; 507 else if (GRAPHICS_VERx100(xe) >= 1250) 508 return xehp_rcs_offsets; 509 else 510 return gen12_rcs_offsets; 511 } else if (class == XE_ENGINE_CLASS_COPY) { 512 if (GRAPHICS_VER(xe) >= 20) 513 return xe2_bcs_offsets; 514 else 515 return gen12_xcs_offsets; 516 } else { 517 if (GRAPHICS_VER(xe) >= 20) 518 return xe2_xcs_offsets; 519 else if (GRAPHICS_VERx100(xe) >= 1255) 520 return dg2_xcs_offsets; 521 else 522 return gen12_xcs_offsets; 523 } 524 } 525 526 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 527 { 528 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | 529 _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | 530 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; 531 532 /* TODO: Timestamp */ 533 } 534 535 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 536 { 537 struct xe_device *xe = gt_to_xe(hwe->gt); 538 539 if (GRAPHICS_VERx100(xe) >= 1250) 540 return 0x70; 541 else 542 return 0x60; 543 } 544 545 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 546 { 547 int x; 548 549 x = lrc_ring_mi_mode(hwe); 550 regs[x + 1] &= ~STOP_RING; 551 regs[x + 1] |= STOP_RING << 16; 552 } 553 554 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 555 { 556 return 0; 557 } 558 559 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 560 { 561 return lrc->ring.size; 562 } 563 564 /* Make the magic macros work */ 565 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 566 567 #define LRC_SEQNO_PPHWSP_OFFSET 512 568 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 569 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 570 #define LRC_PPHWSP_SIZE SZ_4K 571 572 static size_t lrc_reg_size(struct xe_device *xe) 573 { 574 if (GRAPHICS_VERx100(xe) >= 1250) 575 return 96 * sizeof(u32); 576 else 577 return 80 * sizeof(u32); 578 } 579 580 size_t xe_lrc_skip_size(struct xe_device *xe) 581 { 582 return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 583 } 584 585 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 586 { 587 /* The seqno is stored in the driver-defined portion of PPHWSP */ 588 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 589 } 590 591 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 592 { 593 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 594 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 595 } 596 597 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 598 { 599 /* The parallel is stored in the driver-defined portion of PPHWSP */ 600 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 601 } 602 603 static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) 604 { 605 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 606 } 607 608 #define DECL_MAP_ADDR_HELPERS(elem) \ 609 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 610 { \ 611 struct iosys_map map = lrc->bo->vmap; \ 612 \ 613 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 614 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 615 return map; \ 616 } \ 617 static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 618 { \ 619 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 620 } \ 621 622 DECL_MAP_ADDR_HELPERS(ring) 623 DECL_MAP_ADDR_HELPERS(pphwsp) 624 DECL_MAP_ADDR_HELPERS(seqno) 625 DECL_MAP_ADDR_HELPERS(regs) 626 DECL_MAP_ADDR_HELPERS(start_seqno) 627 DECL_MAP_ADDR_HELPERS(parallel) 628 629 #undef DECL_MAP_ADDR_HELPERS 630 631 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 632 { 633 return __xe_lrc_pphwsp_ggtt_addr(lrc); 634 } 635 636 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 637 { 638 struct xe_device *xe = lrc_to_xe(lrc); 639 struct iosys_map map; 640 641 map = __xe_lrc_regs_map(lrc); 642 iosys_map_incr(&map, reg_nr * sizeof(u32)); 643 return xe_map_read32(xe, &map); 644 } 645 646 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 647 { 648 struct xe_device *xe = lrc_to_xe(lrc); 649 struct iosys_map map; 650 651 map = __xe_lrc_regs_map(lrc); 652 iosys_map_incr(&map, reg_nr * sizeof(u32)); 653 xe_map_write32(xe, &map, val); 654 } 655 656 static void *empty_lrc_data(struct xe_hw_engine *hwe) 657 { 658 struct xe_device *xe = gt_to_xe(hwe->gt); 659 void *data; 660 u32 *regs; 661 662 data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); 663 if (!data) 664 return NULL; 665 666 /* 1st page: Per-Process of HW status Page */ 667 regs = data + LRC_PPHWSP_SIZE; 668 set_offsets(regs, reg_offsets(xe, hwe->class), hwe); 669 set_context_control(regs, hwe); 670 reset_stop_ring(regs, hwe); 671 672 return data; 673 } 674 675 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 676 { 677 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); 678 679 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 680 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 681 } 682 683 #define PVC_CTX_ASID (0x2e + 1) 684 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 685 #define ACC_GRANULARITY_S 20 686 #define ACC_NOTIFY_S 16 687 688 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 689 struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) 690 { 691 struct xe_gt *gt = hwe->gt; 692 struct xe_tile *tile = gt_to_tile(gt); 693 struct xe_device *xe = gt_to_xe(gt); 694 struct iosys_map map; 695 void *init_data = NULL; 696 u32 arb_enable; 697 int err; 698 699 lrc->flags = 0; 700 701 /* 702 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 703 * via VM bind calls. 704 */ 705 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, 706 ring_size + xe_lrc_size(xe, hwe->class), 707 ttm_bo_type_kernel, 708 XE_BO_CREATE_VRAM_IF_DGFX(tile) | 709 XE_BO_CREATE_GGTT_BIT); 710 if (IS_ERR(lrc->bo)) 711 return PTR_ERR(lrc->bo); 712 713 lrc->tile = gt_to_tile(hwe->gt); 714 lrc->ring.size = ring_size; 715 lrc->ring.tail = 0; 716 717 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 718 hwe->fence_irq, hwe->name); 719 720 if (!gt->default_lrc[hwe->class]) { 721 init_data = empty_lrc_data(hwe); 722 if (!init_data) { 723 err = -ENOMEM; 724 goto err_lrc_finish; 725 } 726 } 727 728 /* 729 * Init Per-Process of HW status Page, LRC / context state to known 730 * values 731 */ 732 map = __xe_lrc_pphwsp_map(lrc); 733 if (!init_data) { 734 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 735 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 736 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 737 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); 738 } else { 739 xe_map_memcpy_to(xe, &map, 0, init_data, 740 xe_lrc_size(xe, hwe->class)); 741 kfree(init_data); 742 } 743 744 if (vm) { 745 xe_lrc_set_ppgtt(lrc, vm); 746 747 if (vm->xef) 748 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 749 } 750 751 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 752 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 753 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 754 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 755 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 756 if (xe->info.has_asid && vm) 757 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, 758 (q->usm.acc_granularity << 759 ACC_GRANULARITY_S) | vm->usm.asid); 760 if (xe->info.has_usm && vm) 761 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, 762 (q->usm.acc_notify << ACC_NOTIFY_S) | 763 q->usm.acc_trigger); 764 765 lrc->desc = CTX_VALID; 766 lrc->desc |= LEGACY_64B_CONTEXT << CTX_ADDRESSING_MODE_SHIFT; 767 /* TODO: Priority */ 768 769 /* While this appears to have something about privileged batches or 770 * some such, it really just means PPGTT mode. 771 */ 772 if (vm) 773 lrc->desc |= CTX_PRIVILEGE; 774 775 if (GRAPHICS_VERx100(xe) < 1250) { 776 lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; 777 lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT; 778 } 779 780 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 781 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 782 783 map = __xe_lrc_seqno_map(lrc); 784 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 785 786 map = __xe_lrc_start_seqno_map(lrc); 787 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 788 789 return 0; 790 791 err_lrc_finish: 792 xe_lrc_finish(lrc); 793 return err; 794 } 795 796 void xe_lrc_finish(struct xe_lrc *lrc) 797 { 798 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 799 xe_bo_lock(lrc->bo, false); 800 xe_bo_unpin(lrc->bo); 801 xe_bo_unlock(lrc->bo); 802 xe_bo_put(lrc->bo); 803 } 804 805 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 806 { 807 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 808 } 809 810 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 811 { 812 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 813 } 814 815 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 816 { 817 const u32 head = xe_lrc_ring_head(lrc); 818 const u32 tail = lrc->ring.tail; 819 const u32 size = lrc->ring.size; 820 821 return ((head - tail - 1) & (size - 1)) + 1; 822 } 823 824 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 825 const void *data, size_t size) 826 { 827 struct xe_device *xe = lrc_to_xe(lrc); 828 829 iosys_map_incr(&ring, lrc->ring.tail); 830 xe_map_memcpy_to(xe, &ring, 0, data, size); 831 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 832 } 833 834 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 835 { 836 struct xe_device *xe = lrc_to_xe(lrc); 837 struct iosys_map ring; 838 u32 rhs; 839 size_t aligned_size; 840 841 xe_assert(xe, IS_ALIGNED(size, 4)); 842 aligned_size = ALIGN(size, 8); 843 844 ring = __xe_lrc_ring_map(lrc); 845 846 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 847 rhs = lrc->ring.size - lrc->ring.tail; 848 if (size > rhs) { 849 __xe_lrc_write_ring(lrc, ring, data, rhs); 850 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 851 } else { 852 __xe_lrc_write_ring(lrc, ring, data, size); 853 } 854 855 if (aligned_size > size) { 856 u32 noop = MI_NOOP; 857 858 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 859 } 860 } 861 862 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 863 { 864 return lrc->desc | xe_lrc_ggtt_addr(lrc); 865 } 866 867 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 868 { 869 return __xe_lrc_seqno_ggtt_addr(lrc); 870 } 871 872 struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc) 873 { 874 return &xe_hw_fence_create(&lrc->fence_ctx, 875 __xe_lrc_seqno_map(lrc))->dma; 876 } 877 878 s32 xe_lrc_seqno(struct xe_lrc *lrc) 879 { 880 struct iosys_map map = __xe_lrc_seqno_map(lrc); 881 882 return xe_map_read32(lrc_to_xe(lrc), &map); 883 } 884 885 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 886 { 887 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 888 889 return xe_map_read32(lrc_to_xe(lrc), &map); 890 } 891 892 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 893 { 894 return __xe_lrc_start_seqno_ggtt_addr(lrc); 895 } 896 897 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 898 { 899 return __xe_lrc_parallel_ggtt_addr(lrc); 900 } 901 902 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 903 { 904 return __xe_lrc_parallel_map(lrc); 905 } 906 907 static int instr_dw(u32 cmd_header) 908 { 909 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 910 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 911 GFXPIPE_SINGLE_DW_CMD(0, 0)) 912 return 1; 913 914 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 915 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 916 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 917 918 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 919 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 920 } 921 922 static int dump_mi_command(struct drm_printer *p, 923 struct xe_gt *gt, 924 u32 *dw, 925 int remaining_dw) 926 { 927 u32 inst_header = *dw; 928 u32 numdw = instr_dw(inst_header); 929 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 930 int num_noop; 931 932 /* First check for commands that don't have/use a '# DW' field */ 933 switch (inst_header & MI_OPCODE) { 934 case MI_NOOP: 935 num_noop = 1; 936 while (num_noop < remaining_dw && 937 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 938 num_noop++; 939 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 940 return num_noop; 941 942 case MI_TOPOLOGY_FILTER: 943 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 944 return 1; 945 946 case MI_BATCH_BUFFER_END: 947 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 948 /* Return 'remaining_dw' to consume the rest of the LRC */ 949 return remaining_dw; 950 } 951 952 /* 953 * Any remaining commands include a # of dwords. We should make sure 954 * it doesn't exceed the remaining size of the LRC. 955 */ 956 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 957 numdw = remaining_dw; 958 959 switch (inst_header & MI_OPCODE) { 960 case MI_LOAD_REGISTER_IMM: 961 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 962 inst_header, (numdw - 1) / 2); 963 for (int i = 1; i < numdw; i += 2) 964 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 965 return numdw; 966 967 case MI_FORCE_WAKEUP: 968 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 969 return numdw; 970 971 default: 972 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 973 inst_header, opcode, numdw); 974 return numdw; 975 } 976 } 977 978 static int dump_gfxpipe_command(struct drm_printer *p, 979 struct xe_gt *gt, 980 u32 *dw, 981 int remaining_dw) 982 { 983 u32 numdw = instr_dw(*dw); 984 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 985 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 986 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 987 988 /* 989 * Make sure we haven't mis-parsed a number of dwords that exceeds the 990 * remaining size of the LRC. 991 */ 992 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 993 numdw = remaining_dw; 994 995 switch (*dw & GFXPIPE_MATCH_MASK) { 996 #define MATCH(cmd) \ 997 case cmd: \ 998 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 999 return numdw 1000 #define MATCH3D(cmd) \ 1001 case CMD_##cmd: \ 1002 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1003 return numdw 1004 1005 MATCH(STATE_BASE_ADDRESS); 1006 MATCH(STATE_SIP); 1007 MATCH(GPGPU_CSR_BASE_ADDRESS); 1008 MATCH(STATE_COMPUTE_MODE); 1009 MATCH3D(3DSTATE_BTD); 1010 1011 MATCH3D(3DSTATE_VF_STATISTICS); 1012 1013 MATCH(PIPELINE_SELECT); 1014 1015 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1016 MATCH3D(3DSTATE_CLEAR_PARAMS); 1017 MATCH3D(3DSTATE_DEPTH_BUFFER); 1018 MATCH3D(3DSTATE_STENCIL_BUFFER); 1019 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1020 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1021 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1022 MATCH3D(3DSTATE_INDEX_BUFFER); 1023 MATCH3D(3DSTATE_VF); 1024 MATCH3D(3DSTATE_MULTISAMPLE); 1025 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1026 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1027 MATCH3D(3DSTATE_VS); 1028 MATCH3D(3DSTATE_GS); 1029 MATCH3D(3DSTATE_CLIP); 1030 MATCH3D(3DSTATE_SF); 1031 MATCH3D(3DSTATE_WM); 1032 MATCH3D(3DSTATE_CONSTANT_VS); 1033 MATCH3D(3DSTATE_CONSTANT_GS); 1034 MATCH3D(3DSTATE_SAMPLE_MASK); 1035 MATCH3D(3DSTATE_CONSTANT_HS); 1036 MATCH3D(3DSTATE_CONSTANT_DS); 1037 MATCH3D(3DSTATE_HS); 1038 MATCH3D(3DSTATE_TE); 1039 MATCH3D(3DSTATE_DS); 1040 MATCH3D(3DSTATE_STREAMOUT); 1041 MATCH3D(3DSTATE_SBE); 1042 MATCH3D(3DSTATE_PS); 1043 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1044 MATCH3D(3DSTATE_CPS_POINTERS); 1045 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1046 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1047 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1048 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1049 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1050 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1051 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1052 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1053 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1054 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1055 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1056 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1057 MATCH3D(3DSTATE_VF_INSTANCING); 1058 MATCH3D(3DSTATE_VF_SGVS); 1059 MATCH3D(3DSTATE_VF_TOPOLOGY); 1060 MATCH3D(3DSTATE_WM_CHROMAKEY); 1061 MATCH3D(3DSTATE_PS_BLEND); 1062 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1063 MATCH3D(3DSTATE_PS_EXTRA); 1064 MATCH3D(3DSTATE_RASTER); 1065 MATCH3D(3DSTATE_SBE_SWIZ); 1066 MATCH3D(3DSTATE_WM_HZ_OP); 1067 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1068 MATCH3D(3DSTATE_VF_SGVS_2); 1069 MATCH3D(3DSTATE_VFG); 1070 MATCH3D(3DSTATE_URB_ALLOC_VS); 1071 MATCH3D(3DSTATE_URB_ALLOC_HS); 1072 MATCH3D(3DSTATE_URB_ALLOC_DS); 1073 MATCH3D(3DSTATE_URB_ALLOC_GS); 1074 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1075 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1076 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1077 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1078 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1079 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1080 MATCH3D(3DSTATE_AMFS); 1081 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1082 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1083 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1084 MATCH3D(3DSTATE_MESH_CONTROL); 1085 MATCH3D(3DSTATE_MESH_DISTRIB); 1086 MATCH3D(3DSTATE_TASK_REDISTRIB); 1087 MATCH3D(3DSTATE_MESH_SHADER); 1088 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1089 MATCH3D(3DSTATE_TASK_CONTROL); 1090 MATCH3D(3DSTATE_TASK_SHADER); 1091 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1092 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1093 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1094 MATCH3D(3DSTATE_CLIP_MESH); 1095 MATCH3D(3DSTATE_SBE_MESH); 1096 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1097 1098 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1099 MATCH3D(3DSTATE_CHROMA_KEY); 1100 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1101 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1102 MATCH3D(3DSTATE_LINE_STIPPLE); 1103 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1104 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1105 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1106 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1107 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1108 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1109 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1110 MATCH3D(3DSTATE_SO_DECL_LIST); 1111 MATCH3D(3DSTATE_SO_BUFFER); 1112 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1113 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1114 MATCH3D(3DSTATE_3D_MODE); 1115 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1116 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1117 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1118 1119 default: 1120 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1121 *dw, pipeline, opcode, subopcode, numdw); 1122 return numdw; 1123 } 1124 } 1125 1126 void xe_lrc_dump_default(struct drm_printer *p, 1127 struct xe_gt *gt, 1128 enum xe_engine_class hwe_class) 1129 { 1130 u32 *dw; 1131 int remaining_dw, num_dw; 1132 1133 if (!gt->default_lrc[hwe_class]) { 1134 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1135 return; 1136 } 1137 1138 /* 1139 * Skip the beginning of the LRC since it contains the per-process 1140 * hardware status page. 1141 */ 1142 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1143 remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4; 1144 1145 while (remaining_dw > 0) { 1146 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1147 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1148 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1149 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1150 } else { 1151 num_dw = min(instr_dw(*dw), remaining_dw); 1152 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 1153 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 1154 num_dw); 1155 } 1156 1157 dw += num_dw; 1158 remaining_dw -= num_dw; 1159 } 1160 } 1161 1162 struct instr_state { 1163 u32 instr; 1164 u16 num_dw; 1165 }; 1166 1167 static const struct instr_state xe_hpg_svg_state[] = { 1168 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 1169 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 1170 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 1171 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 1172 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 1173 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 1174 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 1175 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 1176 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 1177 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 1178 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 1179 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 1180 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 1181 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 1182 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 1183 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 1184 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 1185 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 1186 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 1187 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 1188 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 1189 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 1190 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 1191 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 1192 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 1193 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 1194 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 1195 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 1196 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 1197 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 1198 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 1199 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 1200 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 1201 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 1202 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 1203 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 1204 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 1205 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 1206 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 1207 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 1208 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 1209 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 1210 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 1211 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 1212 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 1213 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 1214 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 1215 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 1216 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 1217 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 1218 }; 1219 1220 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) 1221 { 1222 struct xe_gt *gt = q->hwe->gt; 1223 struct xe_device *xe = gt_to_xe(gt); 1224 const struct instr_state *state_table = NULL; 1225 int state_table_size = 0; 1226 1227 /* 1228 * At the moment we only need to emit non-register state for the RCS 1229 * engine. 1230 */ 1231 if (q->hwe->class != XE_ENGINE_CLASS_RENDER) 1232 return; 1233 1234 switch (GRAPHICS_VERx100(xe)) { 1235 case 1255: 1236 case 1270 ... 2004: 1237 state_table = xe_hpg_svg_state; 1238 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1239 break; 1240 default: 1241 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1242 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1243 return; 1244 } 1245 1246 for (int i = 0; i < state_table_size; i++) { 1247 u32 instr = state_table[i].instr; 1248 u16 num_dw = state_table[i].num_dw; 1249 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 1250 1251 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 1252 xe_gt_assert(gt, num_dw != 0); 1253 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 1254 1255 /* 1256 * Xe2's SVG context is the same as the one on DG2 / MTL 1257 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 1258 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 1259 * Just make the replacement here rather than defining a 1260 * whole separate table for the single trivial change. 1261 */ 1262 if (GRAPHICS_VER(xe) >= 20 && 1263 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 1264 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 1265 1266 bb->cs[bb->len] = instr; 1267 if (!is_single_dw) 1268 bb->cs[bb->len] |= (num_dw - 2); 1269 1270 bb->len += num_dw; 1271 } 1272 } 1273