1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include <generated/xe_wa_oob.h> 9 10 #include <linux/ascii85.h> 11 12 #include "instructions/xe_mi_commands.h" 13 #include "instructions/xe_gfxpipe_commands.h" 14 #include "instructions/xe_gfx_state_commands.h" 15 #include "regs/xe_engine_regs.h" 16 #include "regs/xe_lrc_layout.h" 17 #include "xe_bb.h" 18 #include "xe_bo.h" 19 #include "xe_device.h" 20 #include "xe_drm_client.h" 21 #include "xe_exec_queue_types.h" 22 #include "xe_gt.h" 23 #include "xe_gt_printk.h" 24 #include "xe_hw_fence.h" 25 #include "xe_map.h" 26 #include "xe_memirq.h" 27 #include "xe_sriov.h" 28 #include "xe_trace_lrc.h" 29 #include "xe_vm.h" 30 #include "xe_wa.h" 31 32 #define LRC_VALID BIT_ULL(0) 33 #define LRC_PRIVILEGE BIT_ULL(8) 34 #define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) 35 #define LRC_LEGACY_64B_CONTEXT 3 36 37 #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) 38 #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) 39 40 #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K 41 42 static struct xe_device * 43 lrc_to_xe(struct xe_lrc *lrc) 44 { 45 return gt_to_xe(lrc->fence_ctx.gt); 46 } 47 48 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) 49 { 50 struct xe_device *xe = gt_to_xe(gt); 51 size_t size; 52 53 switch (class) { 54 case XE_ENGINE_CLASS_RENDER: 55 if (GRAPHICS_VER(xe) >= 20) 56 size = 4 * SZ_4K; 57 else 58 size = 14 * SZ_4K; 59 break; 60 case XE_ENGINE_CLASS_COMPUTE: 61 /* 14 pages since graphics_ver == 11 */ 62 if (GRAPHICS_VER(xe) >= 20) 63 size = 3 * SZ_4K; 64 else 65 size = 14 * SZ_4K; 66 break; 67 default: 68 WARN(1, "Unknown engine class: %d", class); 69 fallthrough; 70 case XE_ENGINE_CLASS_COPY: 71 case XE_ENGINE_CLASS_VIDEO_DECODE: 72 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 73 case XE_ENGINE_CLASS_OTHER: 74 size = 2 * SZ_4K; 75 } 76 77 /* Add indirect ring state page */ 78 if (xe_gt_has_indirect_ring_state(gt)) 79 size += LRC_INDIRECT_RING_STATE_SIZE; 80 81 return size; 82 } 83 84 /* 85 * The per-platform tables are u8-encoded in @data. Decode @data and set the 86 * addresses' offset and commands in @regs. The following encoding is used 87 * for each byte. There are 2 steps: decoding commands and decoding addresses. 88 * 89 * Commands: 90 * [7]: create NOPs - number of NOPs are set in lower bits 91 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 92 * MI_LRI_FORCE_POSTED 93 * [5:0]: Number of NOPs or registers to set values to in case of 94 * MI_LOAD_REGISTER_IMM 95 * 96 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 97 * number of registers. They are set by using the REG/REG16 macros: the former 98 * is used for offsets smaller than 0x200 while the latter is for values bigger 99 * than that. Those macros already set all the bits documented below correctly: 100 * 101 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 102 * follow, for the lower bits 103 * [6:0]: Register offset, without considering the engine base. 104 * 105 * This function only tweaks the commands and register offsets. Values are not 106 * filled out. 107 */ 108 static void set_offsets(u32 *regs, 109 const u8 *data, 110 const struct xe_hw_engine *hwe) 111 #define NOP(x) (BIT(7) | (x)) 112 #define LRI(count, flags) ((flags) << 6 | (count) | \ 113 BUILD_BUG_ON_ZERO(count >= BIT(6))) 114 #define POSTED BIT(0) 115 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 116 #define REG16(x) \ 117 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 118 (((x) >> 2) & 0x7f) 119 { 120 const u32 base = hwe->mmio_base; 121 122 while (*data) { 123 u8 count, flags; 124 125 if (*data & BIT(7)) { /* skip */ 126 count = *data++ & ~BIT(7); 127 regs += count; 128 continue; 129 } 130 131 count = *data & 0x3f; 132 flags = *data >> 6; 133 data++; 134 135 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 136 if (flags & POSTED) 137 *regs |= MI_LRI_FORCE_POSTED; 138 *regs |= MI_LRI_LRM_CS_MMIO; 139 regs++; 140 141 xe_gt_assert(hwe->gt, count); 142 do { 143 u32 offset = 0; 144 u8 v; 145 146 do { 147 v = *data++; 148 offset <<= 7; 149 offset |= v & ~BIT(7); 150 } while (v & BIT(7)); 151 152 regs[0] = base + (offset << 2); 153 regs += 2; 154 } while (--count); 155 } 156 157 *regs = MI_BATCH_BUFFER_END | BIT(0); 158 } 159 160 static const u8 gen12_xcs_offsets[] = { 161 NOP(1), 162 LRI(13, POSTED), 163 REG16(0x244), 164 REG(0x034), 165 REG(0x030), 166 REG(0x038), 167 REG(0x03c), 168 REG(0x168), 169 REG(0x140), 170 REG(0x110), 171 REG(0x1c0), 172 REG(0x1c4), 173 REG(0x1c8), 174 REG(0x180), 175 REG16(0x2b4), 176 177 NOP(5), 178 LRI(9, POSTED), 179 REG16(0x3a8), 180 REG16(0x28c), 181 REG16(0x288), 182 REG16(0x284), 183 REG16(0x280), 184 REG16(0x27c), 185 REG16(0x278), 186 REG16(0x274), 187 REG16(0x270), 188 189 0 190 }; 191 192 static const u8 dg2_xcs_offsets[] = { 193 NOP(1), 194 LRI(15, POSTED), 195 REG16(0x244), 196 REG(0x034), 197 REG(0x030), 198 REG(0x038), 199 REG(0x03c), 200 REG(0x168), 201 REG(0x140), 202 REG(0x110), 203 REG(0x1c0), 204 REG(0x1c4), 205 REG(0x1c8), 206 REG(0x180), 207 REG16(0x2b4), 208 REG(0x120), 209 REG(0x124), 210 211 NOP(1), 212 LRI(9, POSTED), 213 REG16(0x3a8), 214 REG16(0x28c), 215 REG16(0x288), 216 REG16(0x284), 217 REG16(0x280), 218 REG16(0x27c), 219 REG16(0x278), 220 REG16(0x274), 221 REG16(0x270), 222 223 0 224 }; 225 226 static const u8 gen12_rcs_offsets[] = { 227 NOP(1), 228 LRI(13, POSTED), 229 REG16(0x244), 230 REG(0x034), 231 REG(0x030), 232 REG(0x038), 233 REG(0x03c), 234 REG(0x168), 235 REG(0x140), 236 REG(0x110), 237 REG(0x1c0), 238 REG(0x1c4), 239 REG(0x1c8), 240 REG(0x180), 241 REG16(0x2b4), 242 243 NOP(5), 244 LRI(9, POSTED), 245 REG16(0x3a8), 246 REG16(0x28c), 247 REG16(0x288), 248 REG16(0x284), 249 REG16(0x280), 250 REG16(0x27c), 251 REG16(0x278), 252 REG16(0x274), 253 REG16(0x270), 254 255 LRI(3, POSTED), 256 REG(0x1b0), 257 REG16(0x5a8), 258 REG16(0x5ac), 259 260 NOP(6), 261 LRI(1, 0), 262 REG(0x0c8), 263 NOP(3 + 9 + 1), 264 265 LRI(51, POSTED), 266 REG16(0x588), 267 REG16(0x588), 268 REG16(0x588), 269 REG16(0x588), 270 REG16(0x588), 271 REG16(0x588), 272 REG(0x028), 273 REG(0x09c), 274 REG(0x0c0), 275 REG(0x178), 276 REG(0x17c), 277 REG16(0x358), 278 REG(0x170), 279 REG(0x150), 280 REG(0x154), 281 REG(0x158), 282 REG16(0x41c), 283 REG16(0x600), 284 REG16(0x604), 285 REG16(0x608), 286 REG16(0x60c), 287 REG16(0x610), 288 REG16(0x614), 289 REG16(0x618), 290 REG16(0x61c), 291 REG16(0x620), 292 REG16(0x624), 293 REG16(0x628), 294 REG16(0x62c), 295 REG16(0x630), 296 REG16(0x634), 297 REG16(0x638), 298 REG16(0x63c), 299 REG16(0x640), 300 REG16(0x644), 301 REG16(0x648), 302 REG16(0x64c), 303 REG16(0x650), 304 REG16(0x654), 305 REG16(0x658), 306 REG16(0x65c), 307 REG16(0x660), 308 REG16(0x664), 309 REG16(0x668), 310 REG16(0x66c), 311 REG16(0x670), 312 REG16(0x674), 313 REG16(0x678), 314 REG16(0x67c), 315 REG(0x068), 316 REG(0x084), 317 NOP(1), 318 319 0 320 }; 321 322 static const u8 xehp_rcs_offsets[] = { 323 NOP(1), 324 LRI(13, POSTED), 325 REG16(0x244), 326 REG(0x034), 327 REG(0x030), 328 REG(0x038), 329 REG(0x03c), 330 REG(0x168), 331 REG(0x140), 332 REG(0x110), 333 REG(0x1c0), 334 REG(0x1c4), 335 REG(0x1c8), 336 REG(0x180), 337 REG16(0x2b4), 338 339 NOP(5), 340 LRI(9, POSTED), 341 REG16(0x3a8), 342 REG16(0x28c), 343 REG16(0x288), 344 REG16(0x284), 345 REG16(0x280), 346 REG16(0x27c), 347 REG16(0x278), 348 REG16(0x274), 349 REG16(0x270), 350 351 LRI(3, POSTED), 352 REG(0x1b0), 353 REG16(0x5a8), 354 REG16(0x5ac), 355 356 NOP(6), 357 LRI(1, 0), 358 REG(0x0c8), 359 360 0 361 }; 362 363 static const u8 dg2_rcs_offsets[] = { 364 NOP(1), 365 LRI(15, POSTED), 366 REG16(0x244), 367 REG(0x034), 368 REG(0x030), 369 REG(0x038), 370 REG(0x03c), 371 REG(0x168), 372 REG(0x140), 373 REG(0x110), 374 REG(0x1c0), 375 REG(0x1c4), 376 REG(0x1c8), 377 REG(0x180), 378 REG16(0x2b4), 379 REG(0x120), 380 REG(0x124), 381 382 NOP(1), 383 LRI(9, POSTED), 384 REG16(0x3a8), 385 REG16(0x28c), 386 REG16(0x288), 387 REG16(0x284), 388 REG16(0x280), 389 REG16(0x27c), 390 REG16(0x278), 391 REG16(0x274), 392 REG16(0x270), 393 394 LRI(3, POSTED), 395 REG(0x1b0), 396 REG16(0x5a8), 397 REG16(0x5ac), 398 399 NOP(6), 400 LRI(1, 0), 401 REG(0x0c8), 402 403 0 404 }; 405 406 static const u8 mtl_rcs_offsets[] = { 407 NOP(1), 408 LRI(15, POSTED), 409 REG16(0x244), 410 REG(0x034), 411 REG(0x030), 412 REG(0x038), 413 REG(0x03c), 414 REG(0x168), 415 REG(0x140), 416 REG(0x110), 417 REG(0x1c0), 418 REG(0x1c4), 419 REG(0x1c8), 420 REG(0x180), 421 REG16(0x2b4), 422 REG(0x120), 423 REG(0x124), 424 425 NOP(1), 426 LRI(9, POSTED), 427 REG16(0x3a8), 428 REG16(0x28c), 429 REG16(0x288), 430 REG16(0x284), 431 REG16(0x280), 432 REG16(0x27c), 433 REG16(0x278), 434 REG16(0x274), 435 REG16(0x270), 436 437 NOP(2), 438 LRI(2, POSTED), 439 REG16(0x5a8), 440 REG16(0x5ac), 441 442 NOP(6), 443 LRI(1, 0), 444 REG(0x0c8), 445 446 0 447 }; 448 449 #define XE2_CTX_COMMON \ 450 NOP(1), /* [0x00] */ \ 451 LRI(15, POSTED), /* [0x01] */ \ 452 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 453 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 454 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 455 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 456 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 457 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 458 REG(0x140), /* [0x0e] BB_ADDR */ \ 459 REG(0x110), /* [0x10] BB_STATE */ \ 460 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 461 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 462 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 463 REG(0x180), /* [0x18] CCID */ \ 464 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 465 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 466 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 467 \ 468 NOP(1), /* [0x20] */ \ 469 LRI(9, POSTED), /* [0x21] */ \ 470 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 471 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 472 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 473 REG16(0x284), /* [0x28] dummy reg */ \ 474 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 475 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 476 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 477 REG16(0x274), /* [0x30] PTBP_UDW */ \ 478 REG16(0x270) /* [0x32] PTBP_LDW */ 479 480 static const u8 xe2_rcs_offsets[] = { 481 XE2_CTX_COMMON, 482 483 NOP(2), /* [0x34] */ 484 LRI(2, POSTED), /* [0x36] */ 485 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 486 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 487 488 NOP(6), /* [0x41] */ 489 LRI(1, 0), /* [0x47] */ 490 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 491 492 0 493 }; 494 495 static const u8 xe2_bcs_offsets[] = { 496 XE2_CTX_COMMON, 497 498 NOP(4 + 8 + 1), /* [0x34] */ 499 LRI(2, POSTED), /* [0x41] */ 500 REG16(0x200), /* [0x42] BCS_SWCTRL */ 501 REG16(0x204), /* [0x44] BLIT_CCTL */ 502 503 0 504 }; 505 506 static const u8 xe2_xcs_offsets[] = { 507 XE2_CTX_COMMON, 508 509 0 510 }; 511 512 static const u8 xe2_indirect_ring_state_offsets[] = { 513 NOP(1), /* [0x00] */ 514 LRI(5, POSTED), /* [0x01] */ 515 REG(0x034), /* [0x02] RING_BUFFER_HEAD */ 516 REG(0x030), /* [0x04] RING_BUFFER_TAIL */ 517 REG(0x038), /* [0x06] RING_BUFFER_START */ 518 REG(0x048), /* [0x08] RING_BUFFER_START_UDW */ 519 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ 520 521 NOP(5), /* [0x0c] */ 522 LRI(9, POSTED), /* [0x11] */ 523 REG(0x168), /* [0x12] BB_ADDR_UDW */ 524 REG(0x140), /* [0x14] BB_ADDR */ 525 REG(0x110), /* [0x16] BB_STATE */ 526 REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */ 527 REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */ 528 REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */ 529 REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */ 530 REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */ 531 REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */ 532 533 NOP(12), /* [0x00] */ 534 535 0 536 }; 537 538 #undef REG16 539 #undef REG 540 #undef LRI 541 #undef NOP 542 543 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 544 { 545 if (class == XE_ENGINE_CLASS_RENDER) { 546 if (GRAPHICS_VER(xe) >= 20) 547 return xe2_rcs_offsets; 548 else if (GRAPHICS_VERx100(xe) >= 1270) 549 return mtl_rcs_offsets; 550 else if (GRAPHICS_VERx100(xe) >= 1255) 551 return dg2_rcs_offsets; 552 else if (GRAPHICS_VERx100(xe) >= 1250) 553 return xehp_rcs_offsets; 554 else 555 return gen12_rcs_offsets; 556 } else if (class == XE_ENGINE_CLASS_COPY) { 557 if (GRAPHICS_VER(xe) >= 20) 558 return xe2_bcs_offsets; 559 else 560 return gen12_xcs_offsets; 561 } else { 562 if (GRAPHICS_VER(xe) >= 20) 563 return xe2_xcs_offsets; 564 else if (GRAPHICS_VERx100(xe) >= 1255) 565 return dg2_xcs_offsets; 566 else 567 return gen12_xcs_offsets; 568 } 569 } 570 571 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 572 { 573 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | 574 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 575 576 if (xe_gt_has_indirect_ring_state(hwe->gt)) 577 regs[CTX_CONTEXT_CONTROL] |= 578 _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); 579 580 /* TODO: Timestamp */ 581 } 582 583 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) 584 { 585 struct xe_memirq *memirq = >_to_tile(hwe->gt)->memirq; 586 struct xe_device *xe = gt_to_xe(hwe->gt); 587 u8 num_regs; 588 589 if (!xe_device_uses_memirq(xe)) 590 return; 591 592 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | 593 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; 594 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 595 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 596 597 num_regs = xe_device_has_msix(xe) ? 3 : 2; 598 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) | 599 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 600 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 601 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe); 602 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 603 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe); 604 605 if (xe_device_has_msix(xe)) { 606 regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr; 607 /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */ 608 } 609 } 610 611 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 612 { 613 struct xe_device *xe = gt_to_xe(hwe->gt); 614 615 if (GRAPHICS_VERx100(xe) >= 1250) 616 return 0x70; 617 else 618 return 0x60; 619 } 620 621 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 622 { 623 int x; 624 625 x = lrc_ring_mi_mode(hwe); 626 regs[x + 1] &= ~STOP_RING; 627 regs[x + 1] |= STOP_RING << 16; 628 } 629 630 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc) 631 { 632 return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE; 633 } 634 635 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 636 { 637 return 0; 638 } 639 640 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 641 { 642 return lrc->ring.size; 643 } 644 645 /* Make the magic macros work */ 646 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 647 #define __xe_lrc_regs_offset xe_lrc_regs_offset 648 649 #define LRC_SEQNO_PPHWSP_OFFSET 512 650 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 651 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) 652 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 653 #define LRC_PPHWSP_SIZE SZ_4K 654 655 u32 xe_lrc_regs_offset(struct xe_lrc *lrc) 656 { 657 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 658 } 659 660 static size_t lrc_reg_size(struct xe_device *xe) 661 { 662 if (GRAPHICS_VERx100(xe) >= 1250) 663 return 96 * sizeof(u32); 664 else 665 return 80 * sizeof(u32); 666 } 667 668 size_t xe_lrc_skip_size(struct xe_device *xe) 669 { 670 return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 671 } 672 673 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 674 { 675 /* The seqno is stored in the driver-defined portion of PPHWSP */ 676 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 677 } 678 679 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 680 { 681 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 682 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 683 } 684 685 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) 686 { 687 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 688 return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; 689 } 690 691 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 692 { 693 /* The parallel is stored in the driver-defined portion of PPHWSP */ 694 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 695 } 696 697 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) 698 { 699 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); 700 } 701 702 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) 703 { 704 /* Indirect ring state page is at the very end of LRC */ 705 return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; 706 } 707 708 #define DECL_MAP_ADDR_HELPERS(elem) \ 709 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 710 { \ 711 struct iosys_map map = lrc->bo->vmap; \ 712 \ 713 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 714 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 715 return map; \ 716 } \ 717 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 718 { \ 719 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 720 } \ 721 722 DECL_MAP_ADDR_HELPERS(ring) 723 DECL_MAP_ADDR_HELPERS(pphwsp) 724 DECL_MAP_ADDR_HELPERS(seqno) 725 DECL_MAP_ADDR_HELPERS(regs) 726 DECL_MAP_ADDR_HELPERS(start_seqno) 727 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) 728 DECL_MAP_ADDR_HELPERS(ctx_timestamp) 729 DECL_MAP_ADDR_HELPERS(parallel) 730 DECL_MAP_ADDR_HELPERS(indirect_ring) 731 732 #undef DECL_MAP_ADDR_HELPERS 733 734 /** 735 * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address 736 * @lrc: Pointer to the lrc. 737 * 738 * Returns: ctx timestamp GGTT address 739 */ 740 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) 741 { 742 return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); 743 } 744 745 /** 746 * xe_lrc_ctx_timestamp() - Read ctx timestamp value 747 * @lrc: Pointer to the lrc. 748 * 749 * Returns: ctx timestamp value 750 */ 751 u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) 752 { 753 struct xe_device *xe = lrc_to_xe(lrc); 754 struct iosys_map map; 755 756 map = __xe_lrc_ctx_timestamp_map(lrc); 757 return xe_map_read32(xe, &map); 758 } 759 760 /** 761 * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address 762 * @lrc: Pointer to the lrc. 763 * 764 * Returns: ctx timestamp job GGTT address 765 */ 766 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) 767 { 768 return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); 769 } 770 771 /** 772 * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value 773 * @lrc: Pointer to the lrc. 774 * 775 * Returns: ctx timestamp job value 776 */ 777 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) 778 { 779 struct xe_device *xe = lrc_to_xe(lrc); 780 struct iosys_map map; 781 782 map = __xe_lrc_ctx_job_timestamp_map(lrc); 783 return xe_map_read32(xe, &map); 784 } 785 786 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 787 { 788 return __xe_lrc_pphwsp_ggtt_addr(lrc); 789 } 790 791 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc) 792 { 793 if (!xe_lrc_has_indirect_ring_state(lrc)) 794 return 0; 795 796 return __xe_lrc_indirect_ring_ggtt_addr(lrc); 797 } 798 799 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr) 800 { 801 struct xe_device *xe = lrc_to_xe(lrc); 802 struct iosys_map map; 803 804 map = __xe_lrc_indirect_ring_map(lrc); 805 iosys_map_incr(&map, reg_nr * sizeof(u32)); 806 return xe_map_read32(xe, &map); 807 } 808 809 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc, 810 int reg_nr, u32 val) 811 { 812 struct xe_device *xe = lrc_to_xe(lrc); 813 struct iosys_map map; 814 815 map = __xe_lrc_indirect_ring_map(lrc); 816 iosys_map_incr(&map, reg_nr * sizeof(u32)); 817 xe_map_write32(xe, &map, val); 818 } 819 820 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 821 { 822 struct xe_device *xe = lrc_to_xe(lrc); 823 struct iosys_map map; 824 825 map = __xe_lrc_regs_map(lrc); 826 iosys_map_incr(&map, reg_nr * sizeof(u32)); 827 return xe_map_read32(xe, &map); 828 } 829 830 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 831 { 832 struct xe_device *xe = lrc_to_xe(lrc); 833 struct iosys_map map; 834 835 map = __xe_lrc_regs_map(lrc); 836 iosys_map_incr(&map, reg_nr * sizeof(u32)); 837 xe_map_write32(xe, &map, val); 838 } 839 840 static void *empty_lrc_data(struct xe_hw_engine *hwe) 841 { 842 struct xe_gt *gt = hwe->gt; 843 void *data; 844 u32 *regs; 845 846 data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); 847 if (!data) 848 return NULL; 849 850 /* 1st page: Per-Process of HW status Page */ 851 regs = data + LRC_PPHWSP_SIZE; 852 set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe); 853 set_context_control(regs, hwe); 854 set_memory_based_intr(regs, hwe); 855 reset_stop_ring(regs, hwe); 856 if (xe_gt_has_indirect_ring_state(gt)) { 857 regs = data + xe_gt_lrc_size(gt, hwe->class) - 858 LRC_INDIRECT_RING_STATE_SIZE; 859 set_offsets(regs, xe2_indirect_ring_state_offsets, hwe); 860 } 861 862 return data; 863 } 864 865 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 866 { 867 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); 868 869 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 870 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 871 } 872 873 static void xe_lrc_finish(struct xe_lrc *lrc) 874 { 875 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 876 xe_bo_lock(lrc->bo, false); 877 xe_bo_unpin(lrc->bo); 878 xe_bo_unlock(lrc->bo); 879 xe_bo_put(lrc->bo); 880 } 881 882 #define PVC_CTX_ASID (0x2e + 1) 883 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 884 885 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 886 struct xe_vm *vm, u32 ring_size, u16 msix_vec, 887 u32 init_flags) 888 { 889 struct xe_gt *gt = hwe->gt; 890 struct xe_tile *tile = gt_to_tile(gt); 891 struct xe_device *xe = gt_to_xe(gt); 892 struct iosys_map map; 893 void *init_data = NULL; 894 u32 arb_enable; 895 u32 lrc_size; 896 int err; 897 898 kref_init(&lrc->refcount); 899 lrc->flags = 0; 900 lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); 901 if (xe_gt_has_indirect_ring_state(gt)) 902 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; 903 904 /* 905 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 906 * via VM bind calls. 907 */ 908 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, 909 ttm_bo_type_kernel, 910 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 911 XE_BO_FLAG_GGTT | 912 XE_BO_FLAG_GGTT_INVALIDATE); 913 if (IS_ERR(lrc->bo)) 914 return PTR_ERR(lrc->bo); 915 916 lrc->size = lrc_size; 917 lrc->tile = gt_to_tile(hwe->gt); 918 lrc->ring.size = ring_size; 919 lrc->ring.tail = 0; 920 lrc->ctx_timestamp = 0; 921 922 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 923 hwe->fence_irq, hwe->name); 924 925 if (!gt->default_lrc[hwe->class]) { 926 init_data = empty_lrc_data(hwe); 927 if (!init_data) { 928 err = -ENOMEM; 929 goto err_lrc_finish; 930 } 931 } 932 933 /* 934 * Init Per-Process of HW status Page, LRC / context state to known 935 * values 936 */ 937 map = __xe_lrc_pphwsp_map(lrc); 938 if (!init_data) { 939 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 940 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 941 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 942 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); 943 } else { 944 xe_map_memcpy_to(xe, &map, 0, init_data, 945 xe_gt_lrc_size(gt, hwe->class)); 946 kfree(init_data); 947 } 948 949 if (vm) { 950 xe_lrc_set_ppgtt(lrc, vm); 951 952 if (vm->xef) 953 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 954 } 955 956 if (xe_device_has_msix(xe)) { 957 xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR, 958 xe_memirq_status_ptr(&tile->memirq, hwe)); 959 xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR, 960 xe_memirq_source_ptr(&tile->memirq, hwe)); 961 xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec); 962 } 963 964 if (xe_gt_has_indirect_ring_state(gt)) { 965 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, 966 __xe_lrc_indirect_ring_ggtt_addr(lrc)); 967 968 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, 969 __xe_lrc_ring_ggtt_addr(lrc)); 970 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); 971 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); 972 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); 973 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, 974 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 975 } else { 976 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 977 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 978 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 979 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 980 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 981 } 982 983 if (init_flags & XE_LRC_CREATE_RUNALONE) 984 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL, 985 xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | 986 _MASKED_BIT_ENABLE(CTX_CTRL_RUN_ALONE)); 987 988 if (init_flags & XE_LRC_CREATE_PXP) 989 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL, 990 xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | 991 _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE)); 992 993 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); 994 995 if (xe->info.has_asid && vm) 996 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); 997 998 lrc->desc = LRC_VALID; 999 lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); 1000 /* TODO: Priority */ 1001 1002 /* While this appears to have something about privileged batches or 1003 * some such, it really just means PPGTT mode. 1004 */ 1005 if (vm) 1006 lrc->desc |= LRC_PRIVILEGE; 1007 1008 if (GRAPHICS_VERx100(xe) < 1250) { 1009 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); 1010 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); 1011 } 1012 1013 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1014 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 1015 1016 map = __xe_lrc_seqno_map(lrc); 1017 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1018 1019 map = __xe_lrc_start_seqno_map(lrc); 1020 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1021 1022 return 0; 1023 1024 err_lrc_finish: 1025 xe_lrc_finish(lrc); 1026 return err; 1027 } 1028 1029 /** 1030 * xe_lrc_create - Create a LRC 1031 * @hwe: Hardware Engine 1032 * @vm: The VM (address space) 1033 * @ring_size: LRC ring size 1034 * @msix_vec: MSI-X interrupt vector (for platforms that support it) 1035 * @flags: LRC initialization flags 1036 * 1037 * Allocate and initialize the Logical Ring Context (LRC). 1038 * 1039 * Return pointer to created LRC upon success and an error pointer 1040 * upon failure. 1041 */ 1042 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, 1043 u32 ring_size, u16 msix_vec, u32 flags) 1044 { 1045 struct xe_lrc *lrc; 1046 int err; 1047 1048 lrc = kzalloc(sizeof(*lrc), GFP_KERNEL); 1049 if (!lrc) 1050 return ERR_PTR(-ENOMEM); 1051 1052 err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags); 1053 if (err) { 1054 kfree(lrc); 1055 return ERR_PTR(err); 1056 } 1057 1058 return lrc; 1059 } 1060 1061 /** 1062 * xe_lrc_destroy - Destroy the LRC 1063 * @ref: reference to LRC 1064 * 1065 * Called when ref == 0, release resources held by the Logical Ring Context 1066 * (LRC) and free the LRC memory. 1067 */ 1068 void xe_lrc_destroy(struct kref *ref) 1069 { 1070 struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount); 1071 1072 xe_lrc_finish(lrc); 1073 kfree(lrc); 1074 } 1075 1076 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) 1077 { 1078 if (xe_lrc_has_indirect_ring_state(lrc)) 1079 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail); 1080 else 1081 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail); 1082 } 1083 1084 u32 xe_lrc_ring_tail(struct xe_lrc *lrc) 1085 { 1086 if (xe_lrc_has_indirect_ring_state(lrc)) 1087 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR; 1088 else 1089 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; 1090 } 1091 1092 static u32 xe_lrc_ring_start(struct xe_lrc *lrc) 1093 { 1094 if (xe_lrc_has_indirect_ring_state(lrc)) 1095 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START); 1096 else 1097 return xe_lrc_read_ctx_reg(lrc, CTX_RING_START); 1098 } 1099 1100 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 1101 { 1102 if (xe_lrc_has_indirect_ring_state(lrc)) 1103 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head); 1104 else 1105 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 1106 } 1107 1108 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 1109 { 1110 if (xe_lrc_has_indirect_ring_state(lrc)) 1111 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR; 1112 else 1113 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 1114 } 1115 1116 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 1117 { 1118 const u32 head = xe_lrc_ring_head(lrc); 1119 const u32 tail = lrc->ring.tail; 1120 const u32 size = lrc->ring.size; 1121 1122 return ((head - tail - 1) & (size - 1)) + 1; 1123 } 1124 1125 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 1126 const void *data, size_t size) 1127 { 1128 struct xe_device *xe = lrc_to_xe(lrc); 1129 1130 iosys_map_incr(&ring, lrc->ring.tail); 1131 xe_map_memcpy_to(xe, &ring, 0, data, size); 1132 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 1133 } 1134 1135 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 1136 { 1137 struct xe_device *xe = lrc_to_xe(lrc); 1138 struct iosys_map ring; 1139 u32 rhs; 1140 size_t aligned_size; 1141 1142 xe_assert(xe, IS_ALIGNED(size, 4)); 1143 aligned_size = ALIGN(size, 8); 1144 1145 ring = __xe_lrc_ring_map(lrc); 1146 1147 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 1148 rhs = lrc->ring.size - lrc->ring.tail; 1149 if (size > rhs) { 1150 __xe_lrc_write_ring(lrc, ring, data, rhs); 1151 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 1152 } else { 1153 __xe_lrc_write_ring(lrc, ring, data, size); 1154 } 1155 1156 if (aligned_size > size) { 1157 u32 noop = MI_NOOP; 1158 1159 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 1160 } 1161 } 1162 1163 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 1164 { 1165 return lrc->desc | xe_lrc_ggtt_addr(lrc); 1166 } 1167 1168 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 1169 { 1170 return __xe_lrc_seqno_ggtt_addr(lrc); 1171 } 1172 1173 /** 1174 * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence. 1175 * 1176 * Allocate but don't initialize an lrc seqno fence. 1177 * 1178 * Return: Pointer to the allocated fence or 1179 * negative error pointer on error. 1180 */ 1181 struct dma_fence *xe_lrc_alloc_seqno_fence(void) 1182 { 1183 return xe_hw_fence_alloc(); 1184 } 1185 1186 /** 1187 * xe_lrc_free_seqno_fence() - Free an lrc seqno fence. 1188 * @fence: Pointer to the fence to free. 1189 * 1190 * Frees an lrc seqno fence that hasn't yet been 1191 * initialized. 1192 */ 1193 void xe_lrc_free_seqno_fence(struct dma_fence *fence) 1194 { 1195 xe_hw_fence_free(fence); 1196 } 1197 1198 /** 1199 * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence. 1200 * @lrc: Pointer to the lrc. 1201 * @fence: Pointer to the fence to initialize. 1202 * 1203 * Initializes a pre-allocated lrc seqno fence. 1204 * After initialization, the fence is subject to normal 1205 * dma-fence refcounting. 1206 */ 1207 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence) 1208 { 1209 xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc)); 1210 } 1211 1212 s32 xe_lrc_seqno(struct xe_lrc *lrc) 1213 { 1214 struct iosys_map map = __xe_lrc_seqno_map(lrc); 1215 1216 return xe_map_read32(lrc_to_xe(lrc), &map); 1217 } 1218 1219 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 1220 { 1221 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 1222 1223 return xe_map_read32(lrc_to_xe(lrc), &map); 1224 } 1225 1226 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 1227 { 1228 return __xe_lrc_start_seqno_ggtt_addr(lrc); 1229 } 1230 1231 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 1232 { 1233 return __xe_lrc_parallel_ggtt_addr(lrc); 1234 } 1235 1236 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 1237 { 1238 return __xe_lrc_parallel_map(lrc); 1239 } 1240 1241 static int instr_dw(u32 cmd_header) 1242 { 1243 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 1244 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 1245 GFXPIPE_SINGLE_DW_CMD(0, 0)) 1246 return 1; 1247 1248 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 1249 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 1250 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 1251 1252 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 1253 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 1254 } 1255 1256 static int dump_mi_command(struct drm_printer *p, 1257 struct xe_gt *gt, 1258 u32 *dw, 1259 int remaining_dw) 1260 { 1261 u32 inst_header = *dw; 1262 u32 numdw = instr_dw(inst_header); 1263 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 1264 int num_noop; 1265 1266 /* First check for commands that don't have/use a '# DW' field */ 1267 switch (inst_header & MI_OPCODE) { 1268 case MI_NOOP: 1269 num_noop = 1; 1270 while (num_noop < remaining_dw && 1271 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 1272 num_noop++; 1273 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 1274 return num_noop; 1275 1276 case MI_TOPOLOGY_FILTER: 1277 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 1278 return 1; 1279 1280 case MI_BATCH_BUFFER_END: 1281 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 1282 /* Return 'remaining_dw' to consume the rest of the LRC */ 1283 return remaining_dw; 1284 } 1285 1286 /* 1287 * Any remaining commands include a # of dwords. We should make sure 1288 * it doesn't exceed the remaining size of the LRC. 1289 */ 1290 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1291 numdw = remaining_dw; 1292 1293 switch (inst_header & MI_OPCODE) { 1294 case MI_LOAD_REGISTER_IMM: 1295 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 1296 inst_header, (numdw - 1) / 2); 1297 for (int i = 1; i < numdw; i += 2) 1298 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 1299 return numdw; 1300 1301 case MI_LOAD_REGISTER_MEM & MI_OPCODE: 1302 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", 1303 inst_header, 1304 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", 1305 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); 1306 if (numdw == 4) 1307 drm_printf(p, " - %#6x = %#010llx\n", 1308 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); 1309 else 1310 drm_printf(p, " - %*ph (%s)\n", 1311 (int)sizeof(u32) * (numdw - 1), dw + 1, 1312 numdw < 4 ? "truncated" : "malformed"); 1313 return numdw; 1314 1315 case MI_FORCE_WAKEUP: 1316 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 1317 return numdw; 1318 1319 default: 1320 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 1321 inst_header, opcode, numdw); 1322 return numdw; 1323 } 1324 } 1325 1326 static int dump_gfxpipe_command(struct drm_printer *p, 1327 struct xe_gt *gt, 1328 u32 *dw, 1329 int remaining_dw) 1330 { 1331 u32 numdw = instr_dw(*dw); 1332 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 1333 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 1334 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 1335 1336 /* 1337 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1338 * remaining size of the LRC. 1339 */ 1340 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1341 numdw = remaining_dw; 1342 1343 switch (*dw & GFXPIPE_MATCH_MASK) { 1344 #define MATCH(cmd) \ 1345 case cmd: \ 1346 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1347 return numdw 1348 #define MATCH3D(cmd) \ 1349 case CMD_##cmd: \ 1350 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1351 return numdw 1352 1353 MATCH(STATE_BASE_ADDRESS); 1354 MATCH(STATE_SIP); 1355 MATCH(GPGPU_CSR_BASE_ADDRESS); 1356 MATCH(STATE_COMPUTE_MODE); 1357 MATCH3D(3DSTATE_BTD); 1358 MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); 1359 MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); 1360 1361 MATCH3D(3DSTATE_VF_STATISTICS); 1362 1363 MATCH(PIPELINE_SELECT); 1364 1365 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1366 MATCH3D(3DSTATE_CLEAR_PARAMS); 1367 MATCH3D(3DSTATE_DEPTH_BUFFER); 1368 MATCH3D(3DSTATE_STENCIL_BUFFER); 1369 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1370 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1371 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1372 MATCH3D(3DSTATE_INDEX_BUFFER); 1373 MATCH3D(3DSTATE_VF); 1374 MATCH3D(3DSTATE_MULTISAMPLE); 1375 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1376 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1377 MATCH3D(3DSTATE_VS); 1378 MATCH3D(3DSTATE_GS); 1379 MATCH3D(3DSTATE_CLIP); 1380 MATCH3D(3DSTATE_SF); 1381 MATCH3D(3DSTATE_WM); 1382 MATCH3D(3DSTATE_CONSTANT_VS); 1383 MATCH3D(3DSTATE_CONSTANT_GS); 1384 MATCH3D(3DSTATE_CONSTANT_PS); 1385 MATCH3D(3DSTATE_SAMPLE_MASK); 1386 MATCH3D(3DSTATE_CONSTANT_HS); 1387 MATCH3D(3DSTATE_CONSTANT_DS); 1388 MATCH3D(3DSTATE_HS); 1389 MATCH3D(3DSTATE_TE); 1390 MATCH3D(3DSTATE_DS); 1391 MATCH3D(3DSTATE_STREAMOUT); 1392 MATCH3D(3DSTATE_SBE); 1393 MATCH3D(3DSTATE_PS); 1394 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1395 MATCH3D(3DSTATE_CPS_POINTERS); 1396 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1397 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1398 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1399 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1400 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1401 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1402 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1403 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1404 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1405 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1406 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1407 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1408 MATCH3D(3DSTATE_VF_INSTANCING); 1409 MATCH3D(3DSTATE_VF_SGVS); 1410 MATCH3D(3DSTATE_VF_TOPOLOGY); 1411 MATCH3D(3DSTATE_WM_CHROMAKEY); 1412 MATCH3D(3DSTATE_PS_BLEND); 1413 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1414 MATCH3D(3DSTATE_PS_EXTRA); 1415 MATCH3D(3DSTATE_RASTER); 1416 MATCH3D(3DSTATE_SBE_SWIZ); 1417 MATCH3D(3DSTATE_WM_HZ_OP); 1418 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1419 MATCH3D(3DSTATE_VF_SGVS_2); 1420 MATCH3D(3DSTATE_VFG); 1421 MATCH3D(3DSTATE_URB_ALLOC_VS); 1422 MATCH3D(3DSTATE_URB_ALLOC_HS); 1423 MATCH3D(3DSTATE_URB_ALLOC_DS); 1424 MATCH3D(3DSTATE_URB_ALLOC_GS); 1425 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1426 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1427 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1428 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1429 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1430 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1431 MATCH3D(3DSTATE_AMFS); 1432 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1433 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1434 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1435 MATCH3D(3DSTATE_MESH_CONTROL); 1436 MATCH3D(3DSTATE_MESH_DISTRIB); 1437 MATCH3D(3DSTATE_TASK_REDISTRIB); 1438 MATCH3D(3DSTATE_MESH_SHADER); 1439 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1440 MATCH3D(3DSTATE_TASK_CONTROL); 1441 MATCH3D(3DSTATE_TASK_SHADER); 1442 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1443 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1444 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1445 MATCH3D(3DSTATE_CLIP_MESH); 1446 MATCH3D(3DSTATE_SBE_MESH); 1447 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1448 1449 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1450 MATCH3D(3DSTATE_CHROMA_KEY); 1451 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1452 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1453 MATCH3D(3DSTATE_LINE_STIPPLE); 1454 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1455 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1456 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1457 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1458 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1459 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1460 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1461 MATCH3D(3DSTATE_SO_DECL_LIST); 1462 MATCH3D(3DSTATE_SO_BUFFER); 1463 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1464 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1465 MATCH3D(3DSTATE_3D_MODE); 1466 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1467 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1468 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1469 1470 default: 1471 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1472 *dw, pipeline, opcode, subopcode, numdw); 1473 return numdw; 1474 } 1475 } 1476 1477 static int dump_gfx_state_command(struct drm_printer *p, 1478 struct xe_gt *gt, 1479 u32 *dw, 1480 int remaining_dw) 1481 { 1482 u32 numdw = instr_dw(*dw); 1483 u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); 1484 1485 /* 1486 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1487 * remaining size of the LRC. 1488 */ 1489 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1490 numdw = remaining_dw; 1491 1492 switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { 1493 MATCH(STATE_WRITE_INLINE); 1494 1495 default: 1496 drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", 1497 *dw, opcode, numdw); 1498 return numdw; 1499 } 1500 } 1501 1502 void xe_lrc_dump_default(struct drm_printer *p, 1503 struct xe_gt *gt, 1504 enum xe_engine_class hwe_class) 1505 { 1506 u32 *dw; 1507 int remaining_dw, num_dw; 1508 1509 if (!gt->default_lrc[hwe_class]) { 1510 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1511 return; 1512 } 1513 1514 /* 1515 * Skip the beginning of the LRC since it contains the per-process 1516 * hardware status page. 1517 */ 1518 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1519 remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; 1520 1521 while (remaining_dw > 0) { 1522 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1523 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1524 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1525 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1526 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { 1527 num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); 1528 } else { 1529 num_dw = min(instr_dw(*dw), remaining_dw); 1530 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 1531 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 1532 num_dw); 1533 } 1534 1535 dw += num_dw; 1536 remaining_dw -= num_dw; 1537 } 1538 } 1539 1540 struct instr_state { 1541 u32 instr; 1542 u16 num_dw; 1543 }; 1544 1545 static const struct instr_state xe_hpg_svg_state[] = { 1546 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 1547 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 1548 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 1549 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 1550 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 1551 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 1552 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 1553 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 1554 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 1555 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 1556 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 1557 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 1558 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 1559 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 1560 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 1561 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 1562 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 1563 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 1564 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 1565 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 1566 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 1567 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 1568 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 1569 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 1570 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 1571 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 1572 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 1573 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 1574 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 1575 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 1576 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 1577 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 1578 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 1579 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 1580 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 1581 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 1582 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 1583 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 1584 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 1585 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 1586 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 1587 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 1588 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 1589 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 1590 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 1591 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 1592 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 1593 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 1594 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 1595 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 1596 }; 1597 1598 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) 1599 { 1600 struct xe_gt *gt = q->hwe->gt; 1601 struct xe_device *xe = gt_to_xe(gt); 1602 const struct instr_state *state_table = NULL; 1603 int state_table_size = 0; 1604 1605 /* 1606 * Wa_14019789679 1607 * 1608 * If the driver doesn't explicitly emit the SVG instructions while 1609 * setting up the default LRC, the context switch will write 0's 1610 * (noops) into the LRC memory rather than the expected instruction 1611 * headers. Application contexts start out as a copy of the default 1612 * LRC, and if they also do not emit specific settings for some SVG 1613 * state, then on context restore they'll unintentionally inherit 1614 * whatever state setting the previous context had programmed into the 1615 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will 1616 * prevent the hardware from resetting that state back to any specific 1617 * value). 1618 * 1619 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL 1620 * since that's a specific state setting that can easily cause GPU 1621 * hangs if unintentionally inherited. However to be safe we'll 1622 * continue to emit all of the SVG state since it's best not to leak 1623 * any of the state between contexts, even if that leakage is harmless. 1624 */ 1625 if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { 1626 state_table = xe_hpg_svg_state; 1627 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1628 } 1629 1630 if (!state_table) { 1631 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1632 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1633 return; 1634 } 1635 1636 for (int i = 0; i < state_table_size; i++) { 1637 u32 instr = state_table[i].instr; 1638 u16 num_dw = state_table[i].num_dw; 1639 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 1640 1641 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 1642 xe_gt_assert(gt, num_dw != 0); 1643 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 1644 1645 /* 1646 * Xe2's SVG context is the same as the one on DG2 / MTL 1647 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 1648 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 1649 * Just make the replacement here rather than defining a 1650 * whole separate table for the single trivial change. 1651 */ 1652 if (GRAPHICS_VER(xe) >= 20 && 1653 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 1654 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 1655 1656 bb->cs[bb->len] = instr; 1657 if (!is_single_dw) 1658 bb->cs[bb->len] |= (num_dw - 2); 1659 1660 bb->len += num_dw; 1661 } 1662 } 1663 1664 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) 1665 { 1666 struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT); 1667 1668 if (!snapshot) 1669 return NULL; 1670 1671 if (lrc->bo->vm) 1672 xe_vm_get(lrc->bo->vm); 1673 1674 snapshot->context_desc = xe_lrc_ggtt_addr(lrc); 1675 snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc); 1676 snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); 1677 snapshot->head = xe_lrc_ring_head(lrc); 1678 snapshot->tail.internal = lrc->ring.tail; 1679 snapshot->tail.memory = xe_lrc_ring_tail(lrc); 1680 snapshot->start = xe_lrc_ring_start(lrc); 1681 snapshot->start_seqno = xe_lrc_start_seqno(lrc); 1682 snapshot->seqno = xe_lrc_seqno(lrc); 1683 snapshot->lrc_bo = xe_bo_get(lrc->bo); 1684 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); 1685 snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; 1686 snapshot->lrc_snapshot = NULL; 1687 snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 1688 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); 1689 return snapshot; 1690 } 1691 1692 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) 1693 { 1694 struct xe_bo *bo; 1695 struct xe_vm *vm; 1696 struct iosys_map src; 1697 1698 if (!snapshot) 1699 return; 1700 1701 bo = snapshot->lrc_bo; 1702 vm = bo->vm; 1703 snapshot->lrc_bo = NULL; 1704 1705 snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); 1706 if (!snapshot->lrc_snapshot) 1707 goto put_bo; 1708 1709 xe_bo_lock(bo, false); 1710 if (!ttm_bo_vmap(&bo->ttm, &src)) { 1711 xe_map_memcpy_from(xe_bo_device(bo), 1712 snapshot->lrc_snapshot, &src, snapshot->lrc_offset, 1713 snapshot->lrc_size); 1714 ttm_bo_vunmap(&bo->ttm, &src); 1715 } else { 1716 kvfree(snapshot->lrc_snapshot); 1717 snapshot->lrc_snapshot = NULL; 1718 } 1719 xe_bo_unlock(bo); 1720 put_bo: 1721 xe_bo_put(bo); 1722 if (vm) 1723 xe_vm_put(vm); 1724 } 1725 1726 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) 1727 { 1728 unsigned long i; 1729 1730 if (!snapshot) 1731 return; 1732 1733 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); 1734 drm_printf(p, "\tHW Ring address: 0x%08x\n", 1735 snapshot->ring_addr); 1736 drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", 1737 snapshot->indirect_context_desc); 1738 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); 1739 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 1740 snapshot->tail.internal, snapshot->tail.memory); 1741 drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start); 1742 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); 1743 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); 1744 drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); 1745 drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); 1746 1747 if (!snapshot->lrc_snapshot) 1748 return; 1749 1750 drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); 1751 drm_puts(p, "\t[HWSP].data: "); 1752 for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { 1753 u32 *val = snapshot->lrc_snapshot + i; 1754 char dumped[ASCII85_BUFSZ]; 1755 1756 drm_puts(p, ascii85_encode(*val, dumped)); 1757 } 1758 1759 drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); 1760 drm_puts(p, "\t[HWCTX].data: "); 1761 for (; i < snapshot->lrc_size; i += sizeof(u32)) { 1762 u32 *val = snapshot->lrc_snapshot + i; 1763 char dumped[ASCII85_BUFSZ]; 1764 1765 drm_puts(p, ascii85_encode(*val, dumped)); 1766 } 1767 drm_puts(p, "\n"); 1768 } 1769 1770 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) 1771 { 1772 if (!snapshot) 1773 return; 1774 1775 kvfree(snapshot->lrc_snapshot); 1776 if (snapshot->lrc_bo) { 1777 struct xe_vm *vm; 1778 1779 vm = snapshot->lrc_bo->vm; 1780 xe_bo_put(snapshot->lrc_bo); 1781 if (vm) 1782 xe_vm_put(vm); 1783 } 1784 kfree(snapshot); 1785 } 1786 1787 /** 1788 * xe_lrc_update_timestamp() - Update ctx timestamp 1789 * @lrc: Pointer to the lrc. 1790 * @old_ts: Old timestamp value 1791 * 1792 * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and 1793 * update saved value. 1794 * 1795 * Returns: New ctx timestamp value 1796 */ 1797 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) 1798 { 1799 *old_ts = lrc->ctx_timestamp; 1800 1801 lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 1802 1803 trace_xe_lrc_update_timestamp(lrc, *old_ts); 1804 1805 return lrc->ctx_timestamp; 1806 } 1807 1808 /** 1809 * xe_lrc_ring_is_idle() - LRC is idle 1810 * @lrc: Pointer to the lrc. 1811 * 1812 * Compare LRC ring head and tail to determine if idle. 1813 * 1814 * Return: True is ring is idle, False otherwise 1815 */ 1816 bool xe_lrc_ring_is_idle(struct xe_lrc *lrc) 1817 { 1818 return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc); 1819 } 1820