1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include <generated/xe_wa_oob.h> 9 10 #include <linux/ascii85.h> 11 12 #include "instructions/xe_mi_commands.h" 13 #include "instructions/xe_gfxpipe_commands.h" 14 #include "instructions/xe_gfx_state_commands.h" 15 #include "regs/xe_engine_regs.h" 16 #include "regs/xe_lrc_layout.h" 17 #include "xe_bb.h" 18 #include "xe_bo.h" 19 #include "xe_device.h" 20 #include "xe_drm_client.h" 21 #include "xe_exec_queue_types.h" 22 #include "xe_gt.h" 23 #include "xe_gt_printk.h" 24 #include "xe_hw_fence.h" 25 #include "xe_map.h" 26 #include "xe_memirq.h" 27 #include "xe_sriov.h" 28 #include "xe_trace_lrc.h" 29 #include "xe_vm.h" 30 #include "xe_wa.h" 31 32 #define LRC_VALID BIT_ULL(0) 33 #define LRC_PRIVILEGE BIT_ULL(8) 34 #define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) 35 #define LRC_LEGACY_64B_CONTEXT 3 36 37 #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) 38 #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) 39 40 #define LRC_PPHWSP_SIZE SZ_4K 41 #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K 42 43 static struct xe_device * 44 lrc_to_xe(struct xe_lrc *lrc) 45 { 46 return gt_to_xe(lrc->fence_ctx.gt); 47 } 48 49 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) 50 { 51 struct xe_device *xe = gt_to_xe(gt); 52 size_t size; 53 54 /* Per-process HW status page (PPHWSP) */ 55 size = LRC_PPHWSP_SIZE; 56 57 /* Engine context image */ 58 switch (class) { 59 case XE_ENGINE_CLASS_RENDER: 60 if (GRAPHICS_VER(xe) >= 20) 61 size += 3 * SZ_4K; 62 else 63 size += 13 * SZ_4K; 64 break; 65 case XE_ENGINE_CLASS_COMPUTE: 66 if (GRAPHICS_VER(xe) >= 20) 67 size += 2 * SZ_4K; 68 else 69 size += 13 * SZ_4K; 70 break; 71 default: 72 WARN(1, "Unknown engine class: %d", class); 73 fallthrough; 74 case XE_ENGINE_CLASS_COPY: 75 case XE_ENGINE_CLASS_VIDEO_DECODE: 76 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 77 case XE_ENGINE_CLASS_OTHER: 78 size += 1 * SZ_4K; 79 } 80 81 /* Add indirect ring state page */ 82 if (xe_gt_has_indirect_ring_state(gt)) 83 size += LRC_INDIRECT_RING_STATE_SIZE; 84 85 return size; 86 } 87 88 /* 89 * The per-platform tables are u8-encoded in @data. Decode @data and set the 90 * addresses' offset and commands in @regs. The following encoding is used 91 * for each byte. There are 2 steps: decoding commands and decoding addresses. 92 * 93 * Commands: 94 * [7]: create NOPs - number of NOPs are set in lower bits 95 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 96 * MI_LRI_FORCE_POSTED 97 * [5:0]: Number of NOPs or registers to set values to in case of 98 * MI_LOAD_REGISTER_IMM 99 * 100 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 101 * number of registers. They are set by using the REG/REG16 macros: the former 102 * is used for offsets smaller than 0x200 while the latter is for values bigger 103 * than that. Those macros already set all the bits documented below correctly: 104 * 105 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 106 * follow, for the lower bits 107 * [6:0]: Register offset, without considering the engine base. 108 * 109 * This function only tweaks the commands and register offsets. Values are not 110 * filled out. 111 */ 112 static void set_offsets(u32 *regs, 113 const u8 *data, 114 const struct xe_hw_engine *hwe) 115 #define NOP(x) (BIT(7) | (x)) 116 #define LRI(count, flags) ((flags) << 6 | (count) | \ 117 BUILD_BUG_ON_ZERO(count >= BIT(6))) 118 #define POSTED BIT(0) 119 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 120 #define REG16(x) \ 121 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 122 (((x) >> 2) & 0x7f) 123 { 124 const u32 base = hwe->mmio_base; 125 126 while (*data) { 127 u8 count, flags; 128 129 if (*data & BIT(7)) { /* skip */ 130 count = *data++ & ~BIT(7); 131 regs += count; 132 continue; 133 } 134 135 count = *data & 0x3f; 136 flags = *data >> 6; 137 data++; 138 139 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 140 if (flags & POSTED) 141 *regs |= MI_LRI_FORCE_POSTED; 142 *regs |= MI_LRI_LRM_CS_MMIO; 143 regs++; 144 145 xe_gt_assert(hwe->gt, count); 146 do { 147 u32 offset = 0; 148 u8 v; 149 150 do { 151 v = *data++; 152 offset <<= 7; 153 offset |= v & ~BIT(7); 154 } while (v & BIT(7)); 155 156 regs[0] = base + (offset << 2); 157 regs += 2; 158 } while (--count); 159 } 160 161 *regs = MI_BATCH_BUFFER_END | BIT(0); 162 } 163 164 static const u8 gen12_xcs_offsets[] = { 165 NOP(1), 166 LRI(13, POSTED), 167 REG16(0x244), 168 REG(0x034), 169 REG(0x030), 170 REG(0x038), 171 REG(0x03c), 172 REG(0x168), 173 REG(0x140), 174 REG(0x110), 175 REG(0x1c0), 176 REG(0x1c4), 177 REG(0x1c8), 178 REG(0x180), 179 REG16(0x2b4), 180 181 NOP(5), 182 LRI(9, POSTED), 183 REG16(0x3a8), 184 REG16(0x28c), 185 REG16(0x288), 186 REG16(0x284), 187 REG16(0x280), 188 REG16(0x27c), 189 REG16(0x278), 190 REG16(0x274), 191 REG16(0x270), 192 193 0 194 }; 195 196 static const u8 dg2_xcs_offsets[] = { 197 NOP(1), 198 LRI(15, POSTED), 199 REG16(0x244), 200 REG(0x034), 201 REG(0x030), 202 REG(0x038), 203 REG(0x03c), 204 REG(0x168), 205 REG(0x140), 206 REG(0x110), 207 REG(0x1c0), 208 REG(0x1c4), 209 REG(0x1c8), 210 REG(0x180), 211 REG16(0x2b4), 212 REG(0x120), 213 REG(0x124), 214 215 NOP(1), 216 LRI(9, POSTED), 217 REG16(0x3a8), 218 REG16(0x28c), 219 REG16(0x288), 220 REG16(0x284), 221 REG16(0x280), 222 REG16(0x27c), 223 REG16(0x278), 224 REG16(0x274), 225 REG16(0x270), 226 227 0 228 }; 229 230 static const u8 gen12_rcs_offsets[] = { 231 NOP(1), 232 LRI(13, POSTED), 233 REG16(0x244), 234 REG(0x034), 235 REG(0x030), 236 REG(0x038), 237 REG(0x03c), 238 REG(0x168), 239 REG(0x140), 240 REG(0x110), 241 REG(0x1c0), 242 REG(0x1c4), 243 REG(0x1c8), 244 REG(0x180), 245 REG16(0x2b4), 246 247 NOP(5), 248 LRI(9, POSTED), 249 REG16(0x3a8), 250 REG16(0x28c), 251 REG16(0x288), 252 REG16(0x284), 253 REG16(0x280), 254 REG16(0x27c), 255 REG16(0x278), 256 REG16(0x274), 257 REG16(0x270), 258 259 LRI(3, POSTED), 260 REG(0x1b0), 261 REG16(0x5a8), 262 REG16(0x5ac), 263 264 NOP(6), 265 LRI(1, 0), 266 REG(0x0c8), 267 NOP(3 + 9 + 1), 268 269 LRI(51, POSTED), 270 REG16(0x588), 271 REG16(0x588), 272 REG16(0x588), 273 REG16(0x588), 274 REG16(0x588), 275 REG16(0x588), 276 REG(0x028), 277 REG(0x09c), 278 REG(0x0c0), 279 REG(0x178), 280 REG(0x17c), 281 REG16(0x358), 282 REG(0x170), 283 REG(0x150), 284 REG(0x154), 285 REG(0x158), 286 REG16(0x41c), 287 REG16(0x600), 288 REG16(0x604), 289 REG16(0x608), 290 REG16(0x60c), 291 REG16(0x610), 292 REG16(0x614), 293 REG16(0x618), 294 REG16(0x61c), 295 REG16(0x620), 296 REG16(0x624), 297 REG16(0x628), 298 REG16(0x62c), 299 REG16(0x630), 300 REG16(0x634), 301 REG16(0x638), 302 REG16(0x63c), 303 REG16(0x640), 304 REG16(0x644), 305 REG16(0x648), 306 REG16(0x64c), 307 REG16(0x650), 308 REG16(0x654), 309 REG16(0x658), 310 REG16(0x65c), 311 REG16(0x660), 312 REG16(0x664), 313 REG16(0x668), 314 REG16(0x66c), 315 REG16(0x670), 316 REG16(0x674), 317 REG16(0x678), 318 REG16(0x67c), 319 REG(0x068), 320 REG(0x084), 321 NOP(1), 322 323 0 324 }; 325 326 static const u8 xehp_rcs_offsets[] = { 327 NOP(1), 328 LRI(13, POSTED), 329 REG16(0x244), 330 REG(0x034), 331 REG(0x030), 332 REG(0x038), 333 REG(0x03c), 334 REG(0x168), 335 REG(0x140), 336 REG(0x110), 337 REG(0x1c0), 338 REG(0x1c4), 339 REG(0x1c8), 340 REG(0x180), 341 REG16(0x2b4), 342 343 NOP(5), 344 LRI(9, POSTED), 345 REG16(0x3a8), 346 REG16(0x28c), 347 REG16(0x288), 348 REG16(0x284), 349 REG16(0x280), 350 REG16(0x27c), 351 REG16(0x278), 352 REG16(0x274), 353 REG16(0x270), 354 355 LRI(3, POSTED), 356 REG(0x1b0), 357 REG16(0x5a8), 358 REG16(0x5ac), 359 360 NOP(6), 361 LRI(1, 0), 362 REG(0x0c8), 363 364 0 365 }; 366 367 static const u8 dg2_rcs_offsets[] = { 368 NOP(1), 369 LRI(15, POSTED), 370 REG16(0x244), 371 REG(0x034), 372 REG(0x030), 373 REG(0x038), 374 REG(0x03c), 375 REG(0x168), 376 REG(0x140), 377 REG(0x110), 378 REG(0x1c0), 379 REG(0x1c4), 380 REG(0x1c8), 381 REG(0x180), 382 REG16(0x2b4), 383 REG(0x120), 384 REG(0x124), 385 386 NOP(1), 387 LRI(9, POSTED), 388 REG16(0x3a8), 389 REG16(0x28c), 390 REG16(0x288), 391 REG16(0x284), 392 REG16(0x280), 393 REG16(0x27c), 394 REG16(0x278), 395 REG16(0x274), 396 REG16(0x270), 397 398 LRI(3, POSTED), 399 REG(0x1b0), 400 REG16(0x5a8), 401 REG16(0x5ac), 402 403 NOP(6), 404 LRI(1, 0), 405 REG(0x0c8), 406 407 0 408 }; 409 410 static const u8 mtl_rcs_offsets[] = { 411 NOP(1), 412 LRI(15, POSTED), 413 REG16(0x244), 414 REG(0x034), 415 REG(0x030), 416 REG(0x038), 417 REG(0x03c), 418 REG(0x168), 419 REG(0x140), 420 REG(0x110), 421 REG(0x1c0), 422 REG(0x1c4), 423 REG(0x1c8), 424 REG(0x180), 425 REG16(0x2b4), 426 REG(0x120), 427 REG(0x124), 428 429 NOP(1), 430 LRI(9, POSTED), 431 REG16(0x3a8), 432 REG16(0x28c), 433 REG16(0x288), 434 REG16(0x284), 435 REG16(0x280), 436 REG16(0x27c), 437 REG16(0x278), 438 REG16(0x274), 439 REG16(0x270), 440 441 NOP(2), 442 LRI(2, POSTED), 443 REG16(0x5a8), 444 REG16(0x5ac), 445 446 NOP(6), 447 LRI(1, 0), 448 REG(0x0c8), 449 450 0 451 }; 452 453 #define XE2_CTX_COMMON \ 454 NOP(1), /* [0x00] */ \ 455 LRI(15, POSTED), /* [0x01] */ \ 456 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 457 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 458 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 459 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 460 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 461 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 462 REG(0x140), /* [0x0e] BB_ADDR */ \ 463 REG(0x110), /* [0x10] BB_STATE */ \ 464 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 465 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 466 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 467 REG(0x180), /* [0x18] CCID */ \ 468 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 469 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 470 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 471 \ 472 NOP(1), /* [0x20] */ \ 473 LRI(9, POSTED), /* [0x21] */ \ 474 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 475 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 476 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 477 REG16(0x284), /* [0x28] dummy reg */ \ 478 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 479 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 480 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 481 REG16(0x274), /* [0x30] PTBP_UDW */ \ 482 REG16(0x270) /* [0x32] PTBP_LDW */ 483 484 static const u8 xe2_rcs_offsets[] = { 485 XE2_CTX_COMMON, 486 487 NOP(2), /* [0x34] */ 488 LRI(2, POSTED), /* [0x36] */ 489 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 490 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 491 492 NOP(6), /* [0x41] */ 493 LRI(1, 0), /* [0x47] */ 494 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 495 496 0 497 }; 498 499 static const u8 xe2_bcs_offsets[] = { 500 XE2_CTX_COMMON, 501 502 NOP(4 + 8 + 1), /* [0x34] */ 503 LRI(2, POSTED), /* [0x41] */ 504 REG16(0x200), /* [0x42] BCS_SWCTRL */ 505 REG16(0x204), /* [0x44] BLIT_CCTL */ 506 507 0 508 }; 509 510 static const u8 xe2_xcs_offsets[] = { 511 XE2_CTX_COMMON, 512 513 0 514 }; 515 516 static const u8 xe2_indirect_ring_state_offsets[] = { 517 NOP(1), /* [0x00] */ 518 LRI(5, POSTED), /* [0x01] */ 519 REG(0x034), /* [0x02] RING_BUFFER_HEAD */ 520 REG(0x030), /* [0x04] RING_BUFFER_TAIL */ 521 REG(0x038), /* [0x06] RING_BUFFER_START */ 522 REG(0x048), /* [0x08] RING_BUFFER_START_UDW */ 523 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ 524 525 NOP(5), /* [0x0c] */ 526 LRI(9, POSTED), /* [0x11] */ 527 REG(0x168), /* [0x12] BB_ADDR_UDW */ 528 REG(0x140), /* [0x14] BB_ADDR */ 529 REG(0x110), /* [0x16] BB_STATE */ 530 REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */ 531 REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */ 532 REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */ 533 REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */ 534 REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */ 535 REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */ 536 537 NOP(12), /* [0x00] */ 538 539 0 540 }; 541 542 #undef REG16 543 #undef REG 544 #undef LRI 545 #undef NOP 546 547 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 548 { 549 if (class == XE_ENGINE_CLASS_RENDER) { 550 if (GRAPHICS_VER(xe) >= 20) 551 return xe2_rcs_offsets; 552 else if (GRAPHICS_VERx100(xe) >= 1270) 553 return mtl_rcs_offsets; 554 else if (GRAPHICS_VERx100(xe) >= 1255) 555 return dg2_rcs_offsets; 556 else if (GRAPHICS_VERx100(xe) >= 1250) 557 return xehp_rcs_offsets; 558 else 559 return gen12_rcs_offsets; 560 } else if (class == XE_ENGINE_CLASS_COPY) { 561 if (GRAPHICS_VER(xe) >= 20) 562 return xe2_bcs_offsets; 563 else 564 return gen12_xcs_offsets; 565 } else { 566 if (GRAPHICS_VER(xe) >= 20) 567 return xe2_xcs_offsets; 568 else if (GRAPHICS_VERx100(xe) >= 1255) 569 return dg2_xcs_offsets; 570 else 571 return gen12_xcs_offsets; 572 } 573 } 574 575 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 576 { 577 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | 578 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 579 580 if (xe_gt_has_indirect_ring_state(hwe->gt)) 581 regs[CTX_CONTEXT_CONTROL] |= 582 _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); 583 584 /* TODO: Timestamp */ 585 } 586 587 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) 588 { 589 struct xe_memirq *memirq = >_to_tile(hwe->gt)->memirq; 590 struct xe_device *xe = gt_to_xe(hwe->gt); 591 u8 num_regs; 592 593 if (!xe_device_uses_memirq(xe)) 594 return; 595 596 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | 597 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; 598 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 599 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 600 601 num_regs = xe_device_has_msix(xe) ? 3 : 2; 602 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) | 603 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 604 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 605 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe); 606 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 607 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe); 608 609 if (xe_device_has_msix(xe)) { 610 regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr; 611 /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */ 612 } 613 } 614 615 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 616 { 617 struct xe_device *xe = gt_to_xe(hwe->gt); 618 619 if (GRAPHICS_VERx100(xe) >= 1250) 620 return 0x70; 621 else 622 return 0x60; 623 } 624 625 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 626 { 627 int x; 628 629 x = lrc_ring_mi_mode(hwe); 630 regs[x + 1] &= ~STOP_RING; 631 regs[x + 1] |= STOP_RING << 16; 632 } 633 634 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc) 635 { 636 return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE; 637 } 638 639 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 640 { 641 return 0; 642 } 643 644 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 645 { 646 return lrc->ring.size; 647 } 648 649 /* Make the magic macros work */ 650 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 651 #define __xe_lrc_regs_offset xe_lrc_regs_offset 652 653 #define LRC_SEQNO_PPHWSP_OFFSET 512 654 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 655 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) 656 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 657 658 u32 xe_lrc_regs_offset(struct xe_lrc *lrc) 659 { 660 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 661 } 662 663 static size_t lrc_reg_size(struct xe_device *xe) 664 { 665 if (GRAPHICS_VERx100(xe) >= 1250) 666 return 96 * sizeof(u32); 667 else 668 return 80 * sizeof(u32); 669 } 670 671 size_t xe_lrc_skip_size(struct xe_device *xe) 672 { 673 return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 674 } 675 676 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 677 { 678 /* The seqno is stored in the driver-defined portion of PPHWSP */ 679 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 680 } 681 682 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 683 { 684 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 685 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 686 } 687 688 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) 689 { 690 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 691 return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; 692 } 693 694 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 695 { 696 /* The parallel is stored in the driver-defined portion of PPHWSP */ 697 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 698 } 699 700 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) 701 { 702 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); 703 } 704 705 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) 706 { 707 /* Indirect ring state page is at the very end of LRC */ 708 return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; 709 } 710 711 #define DECL_MAP_ADDR_HELPERS(elem) \ 712 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 713 { \ 714 struct iosys_map map = lrc->bo->vmap; \ 715 \ 716 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 717 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 718 return map; \ 719 } \ 720 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 721 { \ 722 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 723 } \ 724 725 DECL_MAP_ADDR_HELPERS(ring) 726 DECL_MAP_ADDR_HELPERS(pphwsp) 727 DECL_MAP_ADDR_HELPERS(seqno) 728 DECL_MAP_ADDR_HELPERS(regs) 729 DECL_MAP_ADDR_HELPERS(start_seqno) 730 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) 731 DECL_MAP_ADDR_HELPERS(ctx_timestamp) 732 DECL_MAP_ADDR_HELPERS(parallel) 733 DECL_MAP_ADDR_HELPERS(indirect_ring) 734 735 #undef DECL_MAP_ADDR_HELPERS 736 737 /** 738 * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address 739 * @lrc: Pointer to the lrc. 740 * 741 * Returns: ctx timestamp GGTT address 742 */ 743 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) 744 { 745 return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); 746 } 747 748 /** 749 * xe_lrc_ctx_timestamp() - Read ctx timestamp value 750 * @lrc: Pointer to the lrc. 751 * 752 * Returns: ctx timestamp value 753 */ 754 u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) 755 { 756 struct xe_device *xe = lrc_to_xe(lrc); 757 struct iosys_map map; 758 759 map = __xe_lrc_ctx_timestamp_map(lrc); 760 return xe_map_read32(xe, &map); 761 } 762 763 /** 764 * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address 765 * @lrc: Pointer to the lrc. 766 * 767 * Returns: ctx timestamp job GGTT address 768 */ 769 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) 770 { 771 return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); 772 } 773 774 /** 775 * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value 776 * @lrc: Pointer to the lrc. 777 * 778 * Returns: ctx timestamp job value 779 */ 780 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) 781 { 782 struct xe_device *xe = lrc_to_xe(lrc); 783 struct iosys_map map; 784 785 map = __xe_lrc_ctx_job_timestamp_map(lrc); 786 return xe_map_read32(xe, &map); 787 } 788 789 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 790 { 791 return __xe_lrc_pphwsp_ggtt_addr(lrc); 792 } 793 794 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc) 795 { 796 if (!xe_lrc_has_indirect_ring_state(lrc)) 797 return 0; 798 799 return __xe_lrc_indirect_ring_ggtt_addr(lrc); 800 } 801 802 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr) 803 { 804 struct xe_device *xe = lrc_to_xe(lrc); 805 struct iosys_map map; 806 807 map = __xe_lrc_indirect_ring_map(lrc); 808 iosys_map_incr(&map, reg_nr * sizeof(u32)); 809 return xe_map_read32(xe, &map); 810 } 811 812 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc, 813 int reg_nr, u32 val) 814 { 815 struct xe_device *xe = lrc_to_xe(lrc); 816 struct iosys_map map; 817 818 map = __xe_lrc_indirect_ring_map(lrc); 819 iosys_map_incr(&map, reg_nr * sizeof(u32)); 820 xe_map_write32(xe, &map, val); 821 } 822 823 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 824 { 825 struct xe_device *xe = lrc_to_xe(lrc); 826 struct iosys_map map; 827 828 map = __xe_lrc_regs_map(lrc); 829 iosys_map_incr(&map, reg_nr * sizeof(u32)); 830 return xe_map_read32(xe, &map); 831 } 832 833 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 834 { 835 struct xe_device *xe = lrc_to_xe(lrc); 836 struct iosys_map map; 837 838 map = __xe_lrc_regs_map(lrc); 839 iosys_map_incr(&map, reg_nr * sizeof(u32)); 840 xe_map_write32(xe, &map, val); 841 } 842 843 static void *empty_lrc_data(struct xe_hw_engine *hwe) 844 { 845 struct xe_gt *gt = hwe->gt; 846 void *data; 847 u32 *regs; 848 849 data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); 850 if (!data) 851 return NULL; 852 853 /* 1st page: Per-Process of HW status Page */ 854 regs = data + LRC_PPHWSP_SIZE; 855 set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe); 856 set_context_control(regs, hwe); 857 set_memory_based_intr(regs, hwe); 858 reset_stop_ring(regs, hwe); 859 if (xe_gt_has_indirect_ring_state(gt)) { 860 regs = data + xe_gt_lrc_size(gt, hwe->class) - 861 LRC_INDIRECT_RING_STATE_SIZE; 862 set_offsets(regs, xe2_indirect_ring_state_offsets, hwe); 863 } 864 865 return data; 866 } 867 868 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 869 { 870 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); 871 872 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 873 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 874 } 875 876 static void xe_lrc_finish(struct xe_lrc *lrc) 877 { 878 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 879 xe_bo_lock(lrc->bo, false); 880 xe_bo_unpin(lrc->bo); 881 xe_bo_unlock(lrc->bo); 882 xe_bo_put(lrc->bo); 883 } 884 885 #define PVC_CTX_ASID (0x2e + 1) 886 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 887 888 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 889 struct xe_vm *vm, u32 ring_size, u16 msix_vec, 890 u32 init_flags) 891 { 892 struct xe_gt *gt = hwe->gt; 893 struct xe_tile *tile = gt_to_tile(gt); 894 struct xe_device *xe = gt_to_xe(gt); 895 struct iosys_map map; 896 void *init_data = NULL; 897 u32 arb_enable; 898 u32 lrc_size; 899 u32 bo_flags; 900 int err; 901 902 kref_init(&lrc->refcount); 903 lrc->flags = 0; 904 lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); 905 if (xe_gt_has_indirect_ring_state(gt)) 906 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; 907 908 bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | 909 XE_BO_FLAG_GGTT_INVALIDATE; 910 if (vm && vm->xef) /* userspace */ 911 bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; 912 913 /* 914 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 915 * via VM bind calls. 916 */ 917 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, 918 ttm_bo_type_kernel, 919 bo_flags); 920 if (IS_ERR(lrc->bo)) 921 return PTR_ERR(lrc->bo); 922 923 lrc->size = lrc_size; 924 lrc->tile = gt_to_tile(hwe->gt); 925 lrc->ring.size = ring_size; 926 lrc->ring.tail = 0; 927 lrc->ctx_timestamp = 0; 928 929 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 930 hwe->fence_irq, hwe->name); 931 932 if (!gt->default_lrc[hwe->class]) { 933 init_data = empty_lrc_data(hwe); 934 if (!init_data) { 935 err = -ENOMEM; 936 goto err_lrc_finish; 937 } 938 } 939 940 /* 941 * Init Per-Process of HW status Page, LRC / context state to known 942 * values 943 */ 944 map = __xe_lrc_pphwsp_map(lrc); 945 if (!init_data) { 946 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 947 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 948 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 949 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); 950 } else { 951 xe_map_memcpy_to(xe, &map, 0, init_data, 952 xe_gt_lrc_size(gt, hwe->class)); 953 kfree(init_data); 954 } 955 956 if (vm) { 957 xe_lrc_set_ppgtt(lrc, vm); 958 959 if (vm->xef) 960 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 961 } 962 963 if (xe_device_has_msix(xe)) { 964 xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR, 965 xe_memirq_status_ptr(&tile->memirq, hwe)); 966 xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR, 967 xe_memirq_source_ptr(&tile->memirq, hwe)); 968 xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec); 969 } 970 971 if (xe_gt_has_indirect_ring_state(gt)) { 972 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, 973 __xe_lrc_indirect_ring_ggtt_addr(lrc)); 974 975 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, 976 __xe_lrc_ring_ggtt_addr(lrc)); 977 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); 978 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); 979 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); 980 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, 981 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 982 } else { 983 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 984 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 985 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 986 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 987 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 988 } 989 990 if (init_flags & XE_LRC_CREATE_RUNALONE) 991 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL, 992 xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | 993 _MASKED_BIT_ENABLE(CTX_CTRL_RUN_ALONE)); 994 995 if (init_flags & XE_LRC_CREATE_PXP) 996 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL, 997 xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | 998 _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE)); 999 1000 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); 1001 1002 if (xe->info.has_asid && vm) 1003 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); 1004 1005 lrc->desc = LRC_VALID; 1006 lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); 1007 /* TODO: Priority */ 1008 1009 /* While this appears to have something about privileged batches or 1010 * some such, it really just means PPGTT mode. 1011 */ 1012 if (vm) 1013 lrc->desc |= LRC_PRIVILEGE; 1014 1015 if (GRAPHICS_VERx100(xe) < 1250) { 1016 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); 1017 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); 1018 } 1019 1020 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1021 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 1022 1023 map = __xe_lrc_seqno_map(lrc); 1024 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1025 1026 map = __xe_lrc_start_seqno_map(lrc); 1027 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1028 1029 return 0; 1030 1031 err_lrc_finish: 1032 xe_lrc_finish(lrc); 1033 return err; 1034 } 1035 1036 /** 1037 * xe_lrc_create - Create a LRC 1038 * @hwe: Hardware Engine 1039 * @vm: The VM (address space) 1040 * @ring_size: LRC ring size 1041 * @msix_vec: MSI-X interrupt vector (for platforms that support it) 1042 * @flags: LRC initialization flags 1043 * 1044 * Allocate and initialize the Logical Ring Context (LRC). 1045 * 1046 * Return pointer to created LRC upon success and an error pointer 1047 * upon failure. 1048 */ 1049 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, 1050 u32 ring_size, u16 msix_vec, u32 flags) 1051 { 1052 struct xe_lrc *lrc; 1053 int err; 1054 1055 lrc = kzalloc(sizeof(*lrc), GFP_KERNEL); 1056 if (!lrc) 1057 return ERR_PTR(-ENOMEM); 1058 1059 err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags); 1060 if (err) { 1061 kfree(lrc); 1062 return ERR_PTR(err); 1063 } 1064 1065 return lrc; 1066 } 1067 1068 /** 1069 * xe_lrc_destroy - Destroy the LRC 1070 * @ref: reference to LRC 1071 * 1072 * Called when ref == 0, release resources held by the Logical Ring Context 1073 * (LRC) and free the LRC memory. 1074 */ 1075 void xe_lrc_destroy(struct kref *ref) 1076 { 1077 struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount); 1078 1079 xe_lrc_finish(lrc); 1080 kfree(lrc); 1081 } 1082 1083 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) 1084 { 1085 if (xe_lrc_has_indirect_ring_state(lrc)) 1086 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail); 1087 else 1088 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail); 1089 } 1090 1091 u32 xe_lrc_ring_tail(struct xe_lrc *lrc) 1092 { 1093 if (xe_lrc_has_indirect_ring_state(lrc)) 1094 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR; 1095 else 1096 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; 1097 } 1098 1099 static u32 xe_lrc_ring_start(struct xe_lrc *lrc) 1100 { 1101 if (xe_lrc_has_indirect_ring_state(lrc)) 1102 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START); 1103 else 1104 return xe_lrc_read_ctx_reg(lrc, CTX_RING_START); 1105 } 1106 1107 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 1108 { 1109 if (xe_lrc_has_indirect_ring_state(lrc)) 1110 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head); 1111 else 1112 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 1113 } 1114 1115 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 1116 { 1117 if (xe_lrc_has_indirect_ring_state(lrc)) 1118 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR; 1119 else 1120 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 1121 } 1122 1123 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 1124 { 1125 const u32 head = xe_lrc_ring_head(lrc); 1126 const u32 tail = lrc->ring.tail; 1127 const u32 size = lrc->ring.size; 1128 1129 return ((head - tail - 1) & (size - 1)) + 1; 1130 } 1131 1132 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 1133 const void *data, size_t size) 1134 { 1135 struct xe_device *xe = lrc_to_xe(lrc); 1136 1137 iosys_map_incr(&ring, lrc->ring.tail); 1138 xe_map_memcpy_to(xe, &ring, 0, data, size); 1139 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 1140 } 1141 1142 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 1143 { 1144 struct xe_device *xe = lrc_to_xe(lrc); 1145 struct iosys_map ring; 1146 u32 rhs; 1147 size_t aligned_size; 1148 1149 xe_assert(xe, IS_ALIGNED(size, 4)); 1150 aligned_size = ALIGN(size, 8); 1151 1152 ring = __xe_lrc_ring_map(lrc); 1153 1154 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 1155 rhs = lrc->ring.size - lrc->ring.tail; 1156 if (size > rhs) { 1157 __xe_lrc_write_ring(lrc, ring, data, rhs); 1158 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 1159 } else { 1160 __xe_lrc_write_ring(lrc, ring, data, size); 1161 } 1162 1163 if (aligned_size > size) { 1164 u32 noop = MI_NOOP; 1165 1166 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 1167 } 1168 } 1169 1170 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 1171 { 1172 return lrc->desc | xe_lrc_ggtt_addr(lrc); 1173 } 1174 1175 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 1176 { 1177 return __xe_lrc_seqno_ggtt_addr(lrc); 1178 } 1179 1180 /** 1181 * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence. 1182 * 1183 * Allocate but don't initialize an lrc seqno fence. 1184 * 1185 * Return: Pointer to the allocated fence or 1186 * negative error pointer on error. 1187 */ 1188 struct dma_fence *xe_lrc_alloc_seqno_fence(void) 1189 { 1190 return xe_hw_fence_alloc(); 1191 } 1192 1193 /** 1194 * xe_lrc_free_seqno_fence() - Free an lrc seqno fence. 1195 * @fence: Pointer to the fence to free. 1196 * 1197 * Frees an lrc seqno fence that hasn't yet been 1198 * initialized. 1199 */ 1200 void xe_lrc_free_seqno_fence(struct dma_fence *fence) 1201 { 1202 xe_hw_fence_free(fence); 1203 } 1204 1205 /** 1206 * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence. 1207 * @lrc: Pointer to the lrc. 1208 * @fence: Pointer to the fence to initialize. 1209 * 1210 * Initializes a pre-allocated lrc seqno fence. 1211 * After initialization, the fence is subject to normal 1212 * dma-fence refcounting. 1213 */ 1214 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence) 1215 { 1216 xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc)); 1217 } 1218 1219 s32 xe_lrc_seqno(struct xe_lrc *lrc) 1220 { 1221 struct iosys_map map = __xe_lrc_seqno_map(lrc); 1222 1223 return xe_map_read32(lrc_to_xe(lrc), &map); 1224 } 1225 1226 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 1227 { 1228 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 1229 1230 return xe_map_read32(lrc_to_xe(lrc), &map); 1231 } 1232 1233 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 1234 { 1235 return __xe_lrc_start_seqno_ggtt_addr(lrc); 1236 } 1237 1238 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 1239 { 1240 return __xe_lrc_parallel_ggtt_addr(lrc); 1241 } 1242 1243 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 1244 { 1245 return __xe_lrc_parallel_map(lrc); 1246 } 1247 1248 static int instr_dw(u32 cmd_header) 1249 { 1250 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 1251 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 1252 GFXPIPE_SINGLE_DW_CMD(0, 0)) 1253 return 1; 1254 1255 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 1256 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 1257 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 1258 1259 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 1260 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 1261 } 1262 1263 static int dump_mi_command(struct drm_printer *p, 1264 struct xe_gt *gt, 1265 u32 *dw, 1266 int remaining_dw) 1267 { 1268 u32 inst_header = *dw; 1269 u32 numdw = instr_dw(inst_header); 1270 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 1271 int num_noop; 1272 1273 /* First check for commands that don't have/use a '# DW' field */ 1274 switch (inst_header & MI_OPCODE) { 1275 case MI_NOOP: 1276 num_noop = 1; 1277 while (num_noop < remaining_dw && 1278 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 1279 num_noop++; 1280 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 1281 return num_noop; 1282 1283 case MI_TOPOLOGY_FILTER: 1284 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 1285 return 1; 1286 1287 case MI_BATCH_BUFFER_END: 1288 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 1289 /* Return 'remaining_dw' to consume the rest of the LRC */ 1290 return remaining_dw; 1291 } 1292 1293 /* 1294 * Any remaining commands include a # of dwords. We should make sure 1295 * it doesn't exceed the remaining size of the LRC. 1296 */ 1297 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1298 numdw = remaining_dw; 1299 1300 switch (inst_header & MI_OPCODE) { 1301 case MI_LOAD_REGISTER_IMM: 1302 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 1303 inst_header, (numdw - 1) / 2); 1304 for (int i = 1; i < numdw; i += 2) 1305 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 1306 return numdw; 1307 1308 case MI_LOAD_REGISTER_MEM & MI_OPCODE: 1309 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", 1310 inst_header, 1311 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", 1312 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); 1313 if (numdw == 4) 1314 drm_printf(p, " - %#6x = %#010llx\n", 1315 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); 1316 else 1317 drm_printf(p, " - %*ph (%s)\n", 1318 (int)sizeof(u32) * (numdw - 1), dw + 1, 1319 numdw < 4 ? "truncated" : "malformed"); 1320 return numdw; 1321 1322 case MI_FORCE_WAKEUP: 1323 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 1324 return numdw; 1325 1326 default: 1327 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 1328 inst_header, opcode, numdw); 1329 return numdw; 1330 } 1331 } 1332 1333 static int dump_gfxpipe_command(struct drm_printer *p, 1334 struct xe_gt *gt, 1335 u32 *dw, 1336 int remaining_dw) 1337 { 1338 u32 numdw = instr_dw(*dw); 1339 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 1340 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 1341 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 1342 1343 /* 1344 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1345 * remaining size of the LRC. 1346 */ 1347 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1348 numdw = remaining_dw; 1349 1350 switch (*dw & GFXPIPE_MATCH_MASK) { 1351 #define MATCH(cmd) \ 1352 case cmd: \ 1353 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1354 return numdw 1355 #define MATCH3D(cmd) \ 1356 case CMD_##cmd: \ 1357 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1358 return numdw 1359 1360 MATCH(STATE_BASE_ADDRESS); 1361 MATCH(STATE_SIP); 1362 MATCH(GPGPU_CSR_BASE_ADDRESS); 1363 MATCH(STATE_COMPUTE_MODE); 1364 MATCH3D(3DSTATE_BTD); 1365 MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); 1366 MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); 1367 1368 MATCH3D(3DSTATE_VF_STATISTICS); 1369 1370 MATCH(PIPELINE_SELECT); 1371 1372 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1373 MATCH3D(3DSTATE_CLEAR_PARAMS); 1374 MATCH3D(3DSTATE_DEPTH_BUFFER); 1375 MATCH3D(3DSTATE_STENCIL_BUFFER); 1376 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1377 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1378 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1379 MATCH3D(3DSTATE_INDEX_BUFFER); 1380 MATCH3D(3DSTATE_VF); 1381 MATCH3D(3DSTATE_MULTISAMPLE); 1382 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1383 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1384 MATCH3D(3DSTATE_VS); 1385 MATCH3D(3DSTATE_GS); 1386 MATCH3D(3DSTATE_CLIP); 1387 MATCH3D(3DSTATE_SF); 1388 MATCH3D(3DSTATE_WM); 1389 MATCH3D(3DSTATE_CONSTANT_VS); 1390 MATCH3D(3DSTATE_CONSTANT_GS); 1391 MATCH3D(3DSTATE_CONSTANT_PS); 1392 MATCH3D(3DSTATE_SAMPLE_MASK); 1393 MATCH3D(3DSTATE_CONSTANT_HS); 1394 MATCH3D(3DSTATE_CONSTANT_DS); 1395 MATCH3D(3DSTATE_HS); 1396 MATCH3D(3DSTATE_TE); 1397 MATCH3D(3DSTATE_DS); 1398 MATCH3D(3DSTATE_STREAMOUT); 1399 MATCH3D(3DSTATE_SBE); 1400 MATCH3D(3DSTATE_PS); 1401 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1402 MATCH3D(3DSTATE_CPS_POINTERS); 1403 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1404 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1405 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1406 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1407 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1408 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1409 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1410 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1411 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1412 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1413 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1414 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1415 MATCH3D(3DSTATE_VF_INSTANCING); 1416 MATCH3D(3DSTATE_VF_SGVS); 1417 MATCH3D(3DSTATE_VF_TOPOLOGY); 1418 MATCH3D(3DSTATE_WM_CHROMAKEY); 1419 MATCH3D(3DSTATE_PS_BLEND); 1420 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1421 MATCH3D(3DSTATE_PS_EXTRA); 1422 MATCH3D(3DSTATE_RASTER); 1423 MATCH3D(3DSTATE_SBE_SWIZ); 1424 MATCH3D(3DSTATE_WM_HZ_OP); 1425 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1426 MATCH3D(3DSTATE_VF_SGVS_2); 1427 MATCH3D(3DSTATE_VFG); 1428 MATCH3D(3DSTATE_URB_ALLOC_VS); 1429 MATCH3D(3DSTATE_URB_ALLOC_HS); 1430 MATCH3D(3DSTATE_URB_ALLOC_DS); 1431 MATCH3D(3DSTATE_URB_ALLOC_GS); 1432 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1433 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1434 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1435 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1436 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1437 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1438 MATCH3D(3DSTATE_AMFS); 1439 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1440 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1441 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1442 MATCH3D(3DSTATE_MESH_CONTROL); 1443 MATCH3D(3DSTATE_MESH_DISTRIB); 1444 MATCH3D(3DSTATE_TASK_REDISTRIB); 1445 MATCH3D(3DSTATE_MESH_SHADER); 1446 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1447 MATCH3D(3DSTATE_TASK_CONTROL); 1448 MATCH3D(3DSTATE_TASK_SHADER); 1449 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1450 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1451 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1452 MATCH3D(3DSTATE_CLIP_MESH); 1453 MATCH3D(3DSTATE_SBE_MESH); 1454 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1455 MATCH3D(3DSTATE_COARSE_PIXEL); 1456 1457 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1458 MATCH3D(3DSTATE_CHROMA_KEY); 1459 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1460 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1461 MATCH3D(3DSTATE_LINE_STIPPLE); 1462 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1463 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1464 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1465 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1466 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1467 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1468 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1469 MATCH3D(3DSTATE_SO_DECL_LIST); 1470 MATCH3D(3DSTATE_SO_BUFFER); 1471 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1472 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1473 MATCH3D(3DSTATE_3D_MODE); 1474 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1475 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1476 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1477 1478 default: 1479 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1480 *dw, pipeline, opcode, subopcode, numdw); 1481 return numdw; 1482 } 1483 } 1484 1485 static int dump_gfx_state_command(struct drm_printer *p, 1486 struct xe_gt *gt, 1487 u32 *dw, 1488 int remaining_dw) 1489 { 1490 u32 numdw = instr_dw(*dw); 1491 u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); 1492 1493 /* 1494 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1495 * remaining size of the LRC. 1496 */ 1497 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1498 numdw = remaining_dw; 1499 1500 switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { 1501 MATCH(STATE_WRITE_INLINE); 1502 1503 default: 1504 drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", 1505 *dw, opcode, numdw); 1506 return numdw; 1507 } 1508 } 1509 1510 void xe_lrc_dump_default(struct drm_printer *p, 1511 struct xe_gt *gt, 1512 enum xe_engine_class hwe_class) 1513 { 1514 u32 *dw; 1515 int remaining_dw, num_dw; 1516 1517 if (!gt->default_lrc[hwe_class]) { 1518 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1519 return; 1520 } 1521 1522 /* 1523 * Skip the beginning of the LRC since it contains the per-process 1524 * hardware status page. 1525 */ 1526 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1527 remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; 1528 1529 while (remaining_dw > 0) { 1530 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1531 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1532 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1533 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1534 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { 1535 num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); 1536 } else { 1537 num_dw = min(instr_dw(*dw), remaining_dw); 1538 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 1539 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 1540 num_dw); 1541 } 1542 1543 dw += num_dw; 1544 remaining_dw -= num_dw; 1545 } 1546 } 1547 1548 struct instr_state { 1549 u32 instr; 1550 u16 num_dw; 1551 }; 1552 1553 static const struct instr_state xe_hpg_svg_state[] = { 1554 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 1555 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 1556 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 1557 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 1558 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 1559 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 1560 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 1561 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 1562 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 1563 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 1564 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 1565 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 1566 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 1567 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 1568 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 1569 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 1570 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 1571 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 1572 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 1573 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 1574 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 1575 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 1576 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 1577 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 1578 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 1579 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 1580 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 1581 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 1582 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 1583 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 1584 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 1585 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 1586 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 1587 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 1588 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 1589 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 1590 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 1591 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 1592 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 1593 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 1594 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 1595 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 1596 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 1597 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 1598 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 1599 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 1600 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 1601 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 1602 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 1603 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 1604 }; 1605 1606 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) 1607 { 1608 struct xe_gt *gt = q->hwe->gt; 1609 struct xe_device *xe = gt_to_xe(gt); 1610 const struct instr_state *state_table = NULL; 1611 int state_table_size = 0; 1612 1613 /* 1614 * Wa_14019789679 1615 * 1616 * If the driver doesn't explicitly emit the SVG instructions while 1617 * setting up the default LRC, the context switch will write 0's 1618 * (noops) into the LRC memory rather than the expected instruction 1619 * headers. Application contexts start out as a copy of the default 1620 * LRC, and if they also do not emit specific settings for some SVG 1621 * state, then on context restore they'll unintentionally inherit 1622 * whatever state setting the previous context had programmed into the 1623 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will 1624 * prevent the hardware from resetting that state back to any specific 1625 * value). 1626 * 1627 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL 1628 * since that's a specific state setting that can easily cause GPU 1629 * hangs if unintentionally inherited. However to be safe we'll 1630 * continue to emit all of the SVG state since it's best not to leak 1631 * any of the state between contexts, even if that leakage is harmless. 1632 */ 1633 if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { 1634 state_table = xe_hpg_svg_state; 1635 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1636 } 1637 1638 if (!state_table) { 1639 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1640 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1641 return; 1642 } 1643 1644 for (int i = 0; i < state_table_size; i++) { 1645 u32 instr = state_table[i].instr; 1646 u16 num_dw = state_table[i].num_dw; 1647 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 1648 1649 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 1650 xe_gt_assert(gt, num_dw != 0); 1651 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 1652 1653 /* 1654 * Xe2's SVG context is the same as the one on DG2 / MTL 1655 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 1656 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 1657 * Just make the replacement here rather than defining a 1658 * whole separate table for the single trivial change. 1659 */ 1660 if (GRAPHICS_VER(xe) >= 20 && 1661 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 1662 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 1663 1664 bb->cs[bb->len] = instr; 1665 if (!is_single_dw) 1666 bb->cs[bb->len] |= (num_dw - 2); 1667 1668 bb->len += num_dw; 1669 } 1670 } 1671 1672 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) 1673 { 1674 struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT); 1675 1676 if (!snapshot) 1677 return NULL; 1678 1679 if (lrc->bo->vm) 1680 xe_vm_get(lrc->bo->vm); 1681 1682 snapshot->context_desc = xe_lrc_ggtt_addr(lrc); 1683 snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc); 1684 snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); 1685 snapshot->head = xe_lrc_ring_head(lrc); 1686 snapshot->tail.internal = lrc->ring.tail; 1687 snapshot->tail.memory = xe_lrc_ring_tail(lrc); 1688 snapshot->start = xe_lrc_ring_start(lrc); 1689 snapshot->start_seqno = xe_lrc_start_seqno(lrc); 1690 snapshot->seqno = xe_lrc_seqno(lrc); 1691 snapshot->lrc_bo = xe_bo_get(lrc->bo); 1692 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); 1693 snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; 1694 snapshot->lrc_snapshot = NULL; 1695 snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 1696 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); 1697 return snapshot; 1698 } 1699 1700 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) 1701 { 1702 struct xe_bo *bo; 1703 struct xe_vm *vm; 1704 struct iosys_map src; 1705 1706 if (!snapshot) 1707 return; 1708 1709 bo = snapshot->lrc_bo; 1710 vm = bo->vm; 1711 snapshot->lrc_bo = NULL; 1712 1713 snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); 1714 if (!snapshot->lrc_snapshot) 1715 goto put_bo; 1716 1717 xe_bo_lock(bo, false); 1718 if (!ttm_bo_vmap(&bo->ttm, &src)) { 1719 xe_map_memcpy_from(xe_bo_device(bo), 1720 snapshot->lrc_snapshot, &src, snapshot->lrc_offset, 1721 snapshot->lrc_size); 1722 ttm_bo_vunmap(&bo->ttm, &src); 1723 } else { 1724 kvfree(snapshot->lrc_snapshot); 1725 snapshot->lrc_snapshot = NULL; 1726 } 1727 xe_bo_unlock(bo); 1728 put_bo: 1729 xe_bo_put(bo); 1730 if (vm) 1731 xe_vm_put(vm); 1732 } 1733 1734 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) 1735 { 1736 unsigned long i; 1737 1738 if (!snapshot) 1739 return; 1740 1741 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); 1742 drm_printf(p, "\tHW Ring address: 0x%08x\n", 1743 snapshot->ring_addr); 1744 drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", 1745 snapshot->indirect_context_desc); 1746 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); 1747 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 1748 snapshot->tail.internal, snapshot->tail.memory); 1749 drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start); 1750 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); 1751 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); 1752 drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); 1753 drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); 1754 1755 if (!snapshot->lrc_snapshot) 1756 return; 1757 1758 drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); 1759 drm_puts(p, "\t[HWSP].data: "); 1760 for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { 1761 u32 *val = snapshot->lrc_snapshot + i; 1762 char dumped[ASCII85_BUFSZ]; 1763 1764 drm_puts(p, ascii85_encode(*val, dumped)); 1765 } 1766 1767 drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); 1768 drm_puts(p, "\t[HWCTX].data: "); 1769 for (; i < snapshot->lrc_size; i += sizeof(u32)) { 1770 u32 *val = snapshot->lrc_snapshot + i; 1771 char dumped[ASCII85_BUFSZ]; 1772 1773 drm_puts(p, ascii85_encode(*val, dumped)); 1774 } 1775 drm_puts(p, "\n"); 1776 } 1777 1778 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) 1779 { 1780 if (!snapshot) 1781 return; 1782 1783 kvfree(snapshot->lrc_snapshot); 1784 if (snapshot->lrc_bo) { 1785 struct xe_vm *vm; 1786 1787 vm = snapshot->lrc_bo->vm; 1788 xe_bo_put(snapshot->lrc_bo); 1789 if (vm) 1790 xe_vm_put(vm); 1791 } 1792 kfree(snapshot); 1793 } 1794 1795 /** 1796 * xe_lrc_update_timestamp() - Update ctx timestamp 1797 * @lrc: Pointer to the lrc. 1798 * @old_ts: Old timestamp value 1799 * 1800 * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and 1801 * update saved value. 1802 * 1803 * Returns: New ctx timestamp value 1804 */ 1805 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) 1806 { 1807 *old_ts = lrc->ctx_timestamp; 1808 1809 lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 1810 1811 trace_xe_lrc_update_timestamp(lrc, *old_ts); 1812 1813 return lrc->ctx_timestamp; 1814 } 1815 1816 /** 1817 * xe_lrc_ring_is_idle() - LRC is idle 1818 * @lrc: Pointer to the lrc. 1819 * 1820 * Compare LRC ring head and tail to determine if idle. 1821 * 1822 * Return: True is ring is idle, False otherwise 1823 */ 1824 bool xe_lrc_ring_is_idle(struct xe_lrc *lrc) 1825 { 1826 return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc); 1827 } 1828