1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include <linux/ascii85.h> 9 10 #include "instructions/xe_mi_commands.h" 11 #include "instructions/xe_gfxpipe_commands.h" 12 #include "instructions/xe_gfx_state_commands.h" 13 #include "regs/xe_engine_regs.h" 14 #include "regs/xe_lrc_layout.h" 15 #include "xe_bb.h" 16 #include "xe_bo.h" 17 #include "xe_device.h" 18 #include "xe_drm_client.h" 19 #include "xe_exec_queue_types.h" 20 #include "xe_gt.h" 21 #include "xe_gt_printk.h" 22 #include "xe_hw_fence.h" 23 #include "xe_map.h" 24 #include "xe_memirq.h" 25 #include "xe_sriov.h" 26 #include "xe_vm.h" 27 28 #define LRC_VALID BIT_ULL(0) 29 #define LRC_PRIVILEGE BIT_ULL(8) 30 #define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) 31 #define LRC_LEGACY_64B_CONTEXT 3 32 33 #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) 34 #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) 35 36 #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K 37 38 struct xe_lrc_snapshot { 39 struct xe_bo *lrc_bo; 40 void *lrc_snapshot; 41 unsigned long lrc_size, lrc_offset; 42 43 u32 context_desc; 44 u32 indirect_context_desc; 45 u32 head; 46 struct { 47 u32 internal; 48 u32 memory; 49 } tail; 50 u32 start_seqno; 51 u32 seqno; 52 u32 ctx_timestamp; 53 u32 ctx_job_timestamp; 54 }; 55 56 static struct xe_device * 57 lrc_to_xe(struct xe_lrc *lrc) 58 { 59 return gt_to_xe(lrc->fence_ctx.gt); 60 } 61 62 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) 63 { 64 struct xe_device *xe = gt_to_xe(gt); 65 size_t size; 66 67 switch (class) { 68 case XE_ENGINE_CLASS_RENDER: 69 if (GRAPHICS_VER(xe) >= 20) 70 size = 4 * SZ_4K; 71 else 72 size = 14 * SZ_4K; 73 break; 74 case XE_ENGINE_CLASS_COMPUTE: 75 /* 14 pages since graphics_ver == 11 */ 76 if (GRAPHICS_VER(xe) >= 20) 77 size = 3 * SZ_4K; 78 else 79 size = 14 * SZ_4K; 80 break; 81 default: 82 WARN(1, "Unknown engine class: %d", class); 83 fallthrough; 84 case XE_ENGINE_CLASS_COPY: 85 case XE_ENGINE_CLASS_VIDEO_DECODE: 86 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 87 case XE_ENGINE_CLASS_OTHER: 88 size = 2 * SZ_4K; 89 } 90 91 /* Add indirect ring state page */ 92 if (xe_gt_has_indirect_ring_state(gt)) 93 size += LRC_INDIRECT_RING_STATE_SIZE; 94 95 return size; 96 } 97 98 /* 99 * The per-platform tables are u8-encoded in @data. Decode @data and set the 100 * addresses' offset and commands in @regs. The following encoding is used 101 * for each byte. There are 2 steps: decoding commands and decoding addresses. 102 * 103 * Commands: 104 * [7]: create NOPs - number of NOPs are set in lower bits 105 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 106 * MI_LRI_FORCE_POSTED 107 * [5:0]: Number of NOPs or registers to set values to in case of 108 * MI_LOAD_REGISTER_IMM 109 * 110 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 111 * number of registers. They are set by using the REG/REG16 macros: the former 112 * is used for offsets smaller than 0x200 while the latter is for values bigger 113 * than that. Those macros already set all the bits documented below correctly: 114 * 115 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 116 * follow, for the lower bits 117 * [6:0]: Register offset, without considering the engine base. 118 * 119 * This function only tweaks the commands and register offsets. Values are not 120 * filled out. 121 */ 122 static void set_offsets(u32 *regs, 123 const u8 *data, 124 const struct xe_hw_engine *hwe) 125 #define NOP(x) (BIT(7) | (x)) 126 #define LRI(count, flags) ((flags) << 6 | (count) | \ 127 BUILD_BUG_ON_ZERO(count >= BIT(6))) 128 #define POSTED BIT(0) 129 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 130 #define REG16(x) \ 131 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 132 (((x) >> 2) & 0x7f) 133 { 134 const u32 base = hwe->mmio_base; 135 136 while (*data) { 137 u8 count, flags; 138 139 if (*data & BIT(7)) { /* skip */ 140 count = *data++ & ~BIT(7); 141 regs += count; 142 continue; 143 } 144 145 count = *data & 0x3f; 146 flags = *data >> 6; 147 data++; 148 149 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 150 if (flags & POSTED) 151 *regs |= MI_LRI_FORCE_POSTED; 152 *regs |= MI_LRI_LRM_CS_MMIO; 153 regs++; 154 155 xe_gt_assert(hwe->gt, count); 156 do { 157 u32 offset = 0; 158 u8 v; 159 160 do { 161 v = *data++; 162 offset <<= 7; 163 offset |= v & ~BIT(7); 164 } while (v & BIT(7)); 165 166 regs[0] = base + (offset << 2); 167 regs += 2; 168 } while (--count); 169 } 170 171 *regs = MI_BATCH_BUFFER_END | BIT(0); 172 } 173 174 static const u8 gen12_xcs_offsets[] = { 175 NOP(1), 176 LRI(13, POSTED), 177 REG16(0x244), 178 REG(0x034), 179 REG(0x030), 180 REG(0x038), 181 REG(0x03c), 182 REG(0x168), 183 REG(0x140), 184 REG(0x110), 185 REG(0x1c0), 186 REG(0x1c4), 187 REG(0x1c8), 188 REG(0x180), 189 REG16(0x2b4), 190 191 NOP(5), 192 LRI(9, POSTED), 193 REG16(0x3a8), 194 REG16(0x28c), 195 REG16(0x288), 196 REG16(0x284), 197 REG16(0x280), 198 REG16(0x27c), 199 REG16(0x278), 200 REG16(0x274), 201 REG16(0x270), 202 203 0 204 }; 205 206 static const u8 dg2_xcs_offsets[] = { 207 NOP(1), 208 LRI(15, POSTED), 209 REG16(0x244), 210 REG(0x034), 211 REG(0x030), 212 REG(0x038), 213 REG(0x03c), 214 REG(0x168), 215 REG(0x140), 216 REG(0x110), 217 REG(0x1c0), 218 REG(0x1c4), 219 REG(0x1c8), 220 REG(0x180), 221 REG16(0x2b4), 222 REG(0x120), 223 REG(0x124), 224 225 NOP(1), 226 LRI(9, POSTED), 227 REG16(0x3a8), 228 REG16(0x28c), 229 REG16(0x288), 230 REG16(0x284), 231 REG16(0x280), 232 REG16(0x27c), 233 REG16(0x278), 234 REG16(0x274), 235 REG16(0x270), 236 237 0 238 }; 239 240 static const u8 gen12_rcs_offsets[] = { 241 NOP(1), 242 LRI(13, POSTED), 243 REG16(0x244), 244 REG(0x034), 245 REG(0x030), 246 REG(0x038), 247 REG(0x03c), 248 REG(0x168), 249 REG(0x140), 250 REG(0x110), 251 REG(0x1c0), 252 REG(0x1c4), 253 REG(0x1c8), 254 REG(0x180), 255 REG16(0x2b4), 256 257 NOP(5), 258 LRI(9, POSTED), 259 REG16(0x3a8), 260 REG16(0x28c), 261 REG16(0x288), 262 REG16(0x284), 263 REG16(0x280), 264 REG16(0x27c), 265 REG16(0x278), 266 REG16(0x274), 267 REG16(0x270), 268 269 LRI(3, POSTED), 270 REG(0x1b0), 271 REG16(0x5a8), 272 REG16(0x5ac), 273 274 NOP(6), 275 LRI(1, 0), 276 REG(0x0c8), 277 NOP(3 + 9 + 1), 278 279 LRI(51, POSTED), 280 REG16(0x588), 281 REG16(0x588), 282 REG16(0x588), 283 REG16(0x588), 284 REG16(0x588), 285 REG16(0x588), 286 REG(0x028), 287 REG(0x09c), 288 REG(0x0c0), 289 REG(0x178), 290 REG(0x17c), 291 REG16(0x358), 292 REG(0x170), 293 REG(0x150), 294 REG(0x154), 295 REG(0x158), 296 REG16(0x41c), 297 REG16(0x600), 298 REG16(0x604), 299 REG16(0x608), 300 REG16(0x60c), 301 REG16(0x610), 302 REG16(0x614), 303 REG16(0x618), 304 REG16(0x61c), 305 REG16(0x620), 306 REG16(0x624), 307 REG16(0x628), 308 REG16(0x62c), 309 REG16(0x630), 310 REG16(0x634), 311 REG16(0x638), 312 REG16(0x63c), 313 REG16(0x640), 314 REG16(0x644), 315 REG16(0x648), 316 REG16(0x64c), 317 REG16(0x650), 318 REG16(0x654), 319 REG16(0x658), 320 REG16(0x65c), 321 REG16(0x660), 322 REG16(0x664), 323 REG16(0x668), 324 REG16(0x66c), 325 REG16(0x670), 326 REG16(0x674), 327 REG16(0x678), 328 REG16(0x67c), 329 REG(0x068), 330 REG(0x084), 331 NOP(1), 332 333 0 334 }; 335 336 static const u8 xehp_rcs_offsets[] = { 337 NOP(1), 338 LRI(13, POSTED), 339 REG16(0x244), 340 REG(0x034), 341 REG(0x030), 342 REG(0x038), 343 REG(0x03c), 344 REG(0x168), 345 REG(0x140), 346 REG(0x110), 347 REG(0x1c0), 348 REG(0x1c4), 349 REG(0x1c8), 350 REG(0x180), 351 REG16(0x2b4), 352 353 NOP(5), 354 LRI(9, POSTED), 355 REG16(0x3a8), 356 REG16(0x28c), 357 REG16(0x288), 358 REG16(0x284), 359 REG16(0x280), 360 REG16(0x27c), 361 REG16(0x278), 362 REG16(0x274), 363 REG16(0x270), 364 365 LRI(3, POSTED), 366 REG(0x1b0), 367 REG16(0x5a8), 368 REG16(0x5ac), 369 370 NOP(6), 371 LRI(1, 0), 372 REG(0x0c8), 373 374 0 375 }; 376 377 static const u8 dg2_rcs_offsets[] = { 378 NOP(1), 379 LRI(15, POSTED), 380 REG16(0x244), 381 REG(0x034), 382 REG(0x030), 383 REG(0x038), 384 REG(0x03c), 385 REG(0x168), 386 REG(0x140), 387 REG(0x110), 388 REG(0x1c0), 389 REG(0x1c4), 390 REG(0x1c8), 391 REG(0x180), 392 REG16(0x2b4), 393 REG(0x120), 394 REG(0x124), 395 396 NOP(1), 397 LRI(9, POSTED), 398 REG16(0x3a8), 399 REG16(0x28c), 400 REG16(0x288), 401 REG16(0x284), 402 REG16(0x280), 403 REG16(0x27c), 404 REG16(0x278), 405 REG16(0x274), 406 REG16(0x270), 407 408 LRI(3, POSTED), 409 REG(0x1b0), 410 REG16(0x5a8), 411 REG16(0x5ac), 412 413 NOP(6), 414 LRI(1, 0), 415 REG(0x0c8), 416 417 0 418 }; 419 420 static const u8 mtl_rcs_offsets[] = { 421 NOP(1), 422 LRI(15, POSTED), 423 REG16(0x244), 424 REG(0x034), 425 REG(0x030), 426 REG(0x038), 427 REG(0x03c), 428 REG(0x168), 429 REG(0x140), 430 REG(0x110), 431 REG(0x1c0), 432 REG(0x1c4), 433 REG(0x1c8), 434 REG(0x180), 435 REG16(0x2b4), 436 REG(0x120), 437 REG(0x124), 438 439 NOP(1), 440 LRI(9, POSTED), 441 REG16(0x3a8), 442 REG16(0x28c), 443 REG16(0x288), 444 REG16(0x284), 445 REG16(0x280), 446 REG16(0x27c), 447 REG16(0x278), 448 REG16(0x274), 449 REG16(0x270), 450 451 NOP(2), 452 LRI(2, POSTED), 453 REG16(0x5a8), 454 REG16(0x5ac), 455 456 NOP(6), 457 LRI(1, 0), 458 REG(0x0c8), 459 460 0 461 }; 462 463 #define XE2_CTX_COMMON \ 464 NOP(1), /* [0x00] */ \ 465 LRI(15, POSTED), /* [0x01] */ \ 466 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 467 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 468 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 469 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 470 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 471 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 472 REG(0x140), /* [0x0e] BB_ADDR */ \ 473 REG(0x110), /* [0x10] BB_STATE */ \ 474 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 475 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 476 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 477 REG(0x180), /* [0x18] CCID */ \ 478 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 479 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 480 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 481 \ 482 NOP(1), /* [0x20] */ \ 483 LRI(9, POSTED), /* [0x21] */ \ 484 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 485 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 486 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 487 REG16(0x284), /* [0x28] dummy reg */ \ 488 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 489 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 490 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 491 REG16(0x274), /* [0x30] PTBP_UDW */ \ 492 REG16(0x270) /* [0x32] PTBP_LDW */ 493 494 static const u8 xe2_rcs_offsets[] = { 495 XE2_CTX_COMMON, 496 497 NOP(2), /* [0x34] */ 498 LRI(2, POSTED), /* [0x36] */ 499 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 500 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 501 502 NOP(6), /* [0x41] */ 503 LRI(1, 0), /* [0x47] */ 504 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 505 506 0 507 }; 508 509 static const u8 xe2_bcs_offsets[] = { 510 XE2_CTX_COMMON, 511 512 NOP(4 + 8 + 1), /* [0x34] */ 513 LRI(2, POSTED), /* [0x41] */ 514 REG16(0x200), /* [0x42] BCS_SWCTRL */ 515 REG16(0x204), /* [0x44] BLIT_CCTL */ 516 517 0 518 }; 519 520 static const u8 xe2_xcs_offsets[] = { 521 XE2_CTX_COMMON, 522 523 0 524 }; 525 526 static const u8 xe2_indirect_ring_state_offsets[] = { 527 NOP(1), /* [0x00] */ 528 LRI(5, POSTED), /* [0x01] */ 529 REG(0x034), /* [0x02] RING_BUFFER_HEAD */ 530 REG(0x030), /* [0x04] RING_BUFFER_TAIL */ 531 REG(0x038), /* [0x06] RING_BUFFER_START */ 532 REG(0x048), /* [0x08] RING_BUFFER_START_UDW */ 533 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ 534 535 NOP(5), /* [0x0c] */ 536 LRI(9, POSTED), /* [0x11] */ 537 REG(0x168), /* [0x12] BB_ADDR_UDW */ 538 REG(0x140), /* [0x14] BB_ADDR */ 539 REG(0x110), /* [0x16] BB_STATE */ 540 REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */ 541 REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */ 542 REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */ 543 REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */ 544 REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */ 545 REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */ 546 547 NOP(12), /* [0x00] */ 548 549 0 550 }; 551 552 #undef REG16 553 #undef REG 554 #undef LRI 555 #undef NOP 556 557 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 558 { 559 if (class == XE_ENGINE_CLASS_RENDER) { 560 if (GRAPHICS_VER(xe) >= 20) 561 return xe2_rcs_offsets; 562 else if (GRAPHICS_VERx100(xe) >= 1270) 563 return mtl_rcs_offsets; 564 else if (GRAPHICS_VERx100(xe) >= 1255) 565 return dg2_rcs_offsets; 566 else if (GRAPHICS_VERx100(xe) >= 1250) 567 return xehp_rcs_offsets; 568 else 569 return gen12_rcs_offsets; 570 } else if (class == XE_ENGINE_CLASS_COPY) { 571 if (GRAPHICS_VER(xe) >= 20) 572 return xe2_bcs_offsets; 573 else 574 return gen12_xcs_offsets; 575 } else { 576 if (GRAPHICS_VER(xe) >= 20) 577 return xe2_xcs_offsets; 578 else if (GRAPHICS_VERx100(xe) >= 1255) 579 return dg2_xcs_offsets; 580 else 581 return gen12_xcs_offsets; 582 } 583 } 584 585 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 586 { 587 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | 588 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 589 590 if (xe_gt_has_indirect_ring_state(hwe->gt)) 591 regs[CTX_CONTEXT_CONTROL] |= 592 _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); 593 594 /* TODO: Timestamp */ 595 } 596 597 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) 598 { 599 struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq; 600 struct xe_device *xe = gt_to_xe(hwe->gt); 601 602 if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe)) 603 return; 604 605 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | 606 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; 607 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 608 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 609 610 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | 611 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 612 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 613 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq); 614 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 615 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq); 616 } 617 618 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 619 { 620 struct xe_device *xe = gt_to_xe(hwe->gt); 621 622 if (GRAPHICS_VERx100(xe) >= 1250) 623 return 0x70; 624 else 625 return 0x60; 626 } 627 628 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 629 { 630 int x; 631 632 x = lrc_ring_mi_mode(hwe); 633 regs[x + 1] &= ~STOP_RING; 634 regs[x + 1] |= STOP_RING << 16; 635 } 636 637 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc) 638 { 639 return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE; 640 } 641 642 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 643 { 644 return 0; 645 } 646 647 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 648 { 649 return lrc->ring.size; 650 } 651 652 /* Make the magic macros work */ 653 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 654 #define __xe_lrc_regs_offset xe_lrc_regs_offset 655 656 #define LRC_SEQNO_PPHWSP_OFFSET 512 657 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 658 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) 659 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 660 #define LRC_PPHWSP_SIZE SZ_4K 661 662 u32 xe_lrc_regs_offset(struct xe_lrc *lrc) 663 { 664 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 665 } 666 667 static size_t lrc_reg_size(struct xe_device *xe) 668 { 669 if (GRAPHICS_VERx100(xe) >= 1250) 670 return 96 * sizeof(u32); 671 else 672 return 80 * sizeof(u32); 673 } 674 675 size_t xe_lrc_skip_size(struct xe_device *xe) 676 { 677 return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 678 } 679 680 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 681 { 682 /* The seqno is stored in the driver-defined portion of PPHWSP */ 683 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 684 } 685 686 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 687 { 688 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 689 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 690 } 691 692 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) 693 { 694 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 695 return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; 696 } 697 698 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 699 { 700 /* The parallel is stored in the driver-defined portion of PPHWSP */ 701 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 702 } 703 704 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) 705 { 706 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); 707 } 708 709 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) 710 { 711 /* Indirect ring state page is at the very end of LRC */ 712 return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; 713 } 714 715 #define DECL_MAP_ADDR_HELPERS(elem) \ 716 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 717 { \ 718 struct iosys_map map = lrc->bo->vmap; \ 719 \ 720 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 721 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 722 return map; \ 723 } \ 724 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 725 { \ 726 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 727 } \ 728 729 DECL_MAP_ADDR_HELPERS(ring) 730 DECL_MAP_ADDR_HELPERS(pphwsp) 731 DECL_MAP_ADDR_HELPERS(seqno) 732 DECL_MAP_ADDR_HELPERS(regs) 733 DECL_MAP_ADDR_HELPERS(start_seqno) 734 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) 735 DECL_MAP_ADDR_HELPERS(ctx_timestamp) 736 DECL_MAP_ADDR_HELPERS(parallel) 737 DECL_MAP_ADDR_HELPERS(indirect_ring) 738 739 #undef DECL_MAP_ADDR_HELPERS 740 741 /** 742 * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address 743 * @lrc: Pointer to the lrc. 744 * 745 * Returns: ctx timestamp GGTT address 746 */ 747 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) 748 { 749 return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); 750 } 751 752 /** 753 * xe_lrc_ctx_timestamp() - Read ctx timestamp value 754 * @lrc: Pointer to the lrc. 755 * 756 * Returns: ctx timestamp value 757 */ 758 u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) 759 { 760 struct xe_device *xe = lrc_to_xe(lrc); 761 struct iosys_map map; 762 763 map = __xe_lrc_ctx_timestamp_map(lrc); 764 return xe_map_read32(xe, &map); 765 } 766 767 /** 768 * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address 769 * @lrc: Pointer to the lrc. 770 * 771 * Returns: ctx timestamp job GGTT address 772 */ 773 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) 774 { 775 return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); 776 } 777 778 /** 779 * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value 780 * @lrc: Pointer to the lrc. 781 * 782 * Returns: ctx timestamp job value 783 */ 784 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) 785 { 786 struct xe_device *xe = lrc_to_xe(lrc); 787 struct iosys_map map; 788 789 map = __xe_lrc_ctx_job_timestamp_map(lrc); 790 return xe_map_read32(xe, &map); 791 } 792 793 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 794 { 795 return __xe_lrc_pphwsp_ggtt_addr(lrc); 796 } 797 798 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc) 799 { 800 if (!xe_lrc_has_indirect_ring_state(lrc)) 801 return 0; 802 803 return __xe_lrc_indirect_ring_ggtt_addr(lrc); 804 } 805 806 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr) 807 { 808 struct xe_device *xe = lrc_to_xe(lrc); 809 struct iosys_map map; 810 811 map = __xe_lrc_indirect_ring_map(lrc); 812 iosys_map_incr(&map, reg_nr * sizeof(u32)); 813 return xe_map_read32(xe, &map); 814 } 815 816 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc, 817 int reg_nr, u32 val) 818 { 819 struct xe_device *xe = lrc_to_xe(lrc); 820 struct iosys_map map; 821 822 map = __xe_lrc_indirect_ring_map(lrc); 823 iosys_map_incr(&map, reg_nr * sizeof(u32)); 824 xe_map_write32(xe, &map, val); 825 } 826 827 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 828 { 829 struct xe_device *xe = lrc_to_xe(lrc); 830 struct iosys_map map; 831 832 map = __xe_lrc_regs_map(lrc); 833 iosys_map_incr(&map, reg_nr * sizeof(u32)); 834 return xe_map_read32(xe, &map); 835 } 836 837 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 838 { 839 struct xe_device *xe = lrc_to_xe(lrc); 840 struct iosys_map map; 841 842 map = __xe_lrc_regs_map(lrc); 843 iosys_map_incr(&map, reg_nr * sizeof(u32)); 844 xe_map_write32(xe, &map, val); 845 } 846 847 static void *empty_lrc_data(struct xe_hw_engine *hwe) 848 { 849 struct xe_gt *gt = hwe->gt; 850 void *data; 851 u32 *regs; 852 853 data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); 854 if (!data) 855 return NULL; 856 857 /* 1st page: Per-Process of HW status Page */ 858 regs = data + LRC_PPHWSP_SIZE; 859 set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe); 860 set_context_control(regs, hwe); 861 set_memory_based_intr(regs, hwe); 862 reset_stop_ring(regs, hwe); 863 if (xe_gt_has_indirect_ring_state(gt)) { 864 regs = data + xe_gt_lrc_size(gt, hwe->class) - 865 LRC_INDIRECT_RING_STATE_SIZE; 866 set_offsets(regs, xe2_indirect_ring_state_offsets, hwe); 867 } 868 869 return data; 870 } 871 872 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 873 { 874 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); 875 876 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 877 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 878 } 879 880 static void xe_lrc_finish(struct xe_lrc *lrc) 881 { 882 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 883 xe_bo_lock(lrc->bo, false); 884 xe_bo_unpin(lrc->bo); 885 xe_bo_unlock(lrc->bo); 886 xe_bo_put(lrc->bo); 887 } 888 889 #define PVC_CTX_ASID (0x2e + 1) 890 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 891 892 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 893 struct xe_vm *vm, u32 ring_size) 894 { 895 struct xe_gt *gt = hwe->gt; 896 struct xe_tile *tile = gt_to_tile(gt); 897 struct xe_device *xe = gt_to_xe(gt); 898 struct iosys_map map; 899 void *init_data = NULL; 900 u32 arb_enable; 901 u32 lrc_size; 902 int err; 903 904 kref_init(&lrc->refcount); 905 lrc->flags = 0; 906 lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); 907 if (xe_gt_has_indirect_ring_state(gt)) 908 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; 909 910 /* 911 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 912 * via VM bind calls. 913 */ 914 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, 915 ttm_bo_type_kernel, 916 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 917 XE_BO_FLAG_GGTT | 918 XE_BO_FLAG_GGTT_INVALIDATE); 919 if (IS_ERR(lrc->bo)) 920 return PTR_ERR(lrc->bo); 921 922 lrc->size = lrc_size; 923 lrc->tile = gt_to_tile(hwe->gt); 924 lrc->ring.size = ring_size; 925 lrc->ring.tail = 0; 926 lrc->ctx_timestamp = 0; 927 928 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 929 hwe->fence_irq, hwe->name); 930 931 if (!gt->default_lrc[hwe->class]) { 932 init_data = empty_lrc_data(hwe); 933 if (!init_data) { 934 err = -ENOMEM; 935 goto err_lrc_finish; 936 } 937 } 938 939 /* 940 * Init Per-Process of HW status Page, LRC / context state to known 941 * values 942 */ 943 map = __xe_lrc_pphwsp_map(lrc); 944 if (!init_data) { 945 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 946 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 947 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 948 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); 949 } else { 950 xe_map_memcpy_to(xe, &map, 0, init_data, 951 xe_gt_lrc_size(gt, hwe->class)); 952 kfree(init_data); 953 } 954 955 if (vm) { 956 xe_lrc_set_ppgtt(lrc, vm); 957 958 if (vm->xef) 959 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 960 } 961 962 if (xe_gt_has_indirect_ring_state(gt)) { 963 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, 964 __xe_lrc_indirect_ring_ggtt_addr(lrc)); 965 966 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, 967 __xe_lrc_ring_ggtt_addr(lrc)); 968 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); 969 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); 970 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); 971 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, 972 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 973 } else { 974 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 975 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 976 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 977 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 978 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 979 } 980 981 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); 982 983 if (xe->info.has_asid && vm) 984 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); 985 986 lrc->desc = LRC_VALID; 987 lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); 988 /* TODO: Priority */ 989 990 /* While this appears to have something about privileged batches or 991 * some such, it really just means PPGTT mode. 992 */ 993 if (vm) 994 lrc->desc |= LRC_PRIVILEGE; 995 996 if (GRAPHICS_VERx100(xe) < 1250) { 997 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); 998 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); 999 } 1000 1001 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1002 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 1003 1004 map = __xe_lrc_seqno_map(lrc); 1005 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1006 1007 map = __xe_lrc_start_seqno_map(lrc); 1008 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1009 1010 return 0; 1011 1012 err_lrc_finish: 1013 xe_lrc_finish(lrc); 1014 return err; 1015 } 1016 1017 /** 1018 * xe_lrc_create - Create a LRC 1019 * @hwe: Hardware Engine 1020 * @vm: The VM (address space) 1021 * @ring_size: LRC ring size 1022 * 1023 * Allocate and initialize the Logical Ring Context (LRC). 1024 * 1025 * Return pointer to created LRC upon success and an error pointer 1026 * upon failure. 1027 */ 1028 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, 1029 u32 ring_size) 1030 { 1031 struct xe_lrc *lrc; 1032 int err; 1033 1034 lrc = kzalloc(sizeof(*lrc), GFP_KERNEL); 1035 if (!lrc) 1036 return ERR_PTR(-ENOMEM); 1037 1038 err = xe_lrc_init(lrc, hwe, vm, ring_size); 1039 if (err) { 1040 kfree(lrc); 1041 return ERR_PTR(err); 1042 } 1043 1044 return lrc; 1045 } 1046 1047 /** 1048 * xe_lrc_destroy - Destroy the LRC 1049 * @ref: reference to LRC 1050 * 1051 * Called when ref == 0, release resources held by the Logical Ring Context 1052 * (LRC) and free the LRC memory. 1053 */ 1054 void xe_lrc_destroy(struct kref *ref) 1055 { 1056 struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount); 1057 1058 xe_lrc_finish(lrc); 1059 kfree(lrc); 1060 } 1061 1062 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) 1063 { 1064 if (xe_lrc_has_indirect_ring_state(lrc)) 1065 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail); 1066 else 1067 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail); 1068 } 1069 1070 u32 xe_lrc_ring_tail(struct xe_lrc *lrc) 1071 { 1072 if (xe_lrc_has_indirect_ring_state(lrc)) 1073 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR; 1074 else 1075 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; 1076 } 1077 1078 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 1079 { 1080 if (xe_lrc_has_indirect_ring_state(lrc)) 1081 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head); 1082 else 1083 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 1084 } 1085 1086 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 1087 { 1088 if (xe_lrc_has_indirect_ring_state(lrc)) 1089 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR; 1090 else 1091 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 1092 } 1093 1094 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 1095 { 1096 const u32 head = xe_lrc_ring_head(lrc); 1097 const u32 tail = lrc->ring.tail; 1098 const u32 size = lrc->ring.size; 1099 1100 return ((head - tail - 1) & (size - 1)) + 1; 1101 } 1102 1103 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 1104 const void *data, size_t size) 1105 { 1106 struct xe_device *xe = lrc_to_xe(lrc); 1107 1108 iosys_map_incr(&ring, lrc->ring.tail); 1109 xe_map_memcpy_to(xe, &ring, 0, data, size); 1110 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 1111 } 1112 1113 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 1114 { 1115 struct xe_device *xe = lrc_to_xe(lrc); 1116 struct iosys_map ring; 1117 u32 rhs; 1118 size_t aligned_size; 1119 1120 xe_assert(xe, IS_ALIGNED(size, 4)); 1121 aligned_size = ALIGN(size, 8); 1122 1123 ring = __xe_lrc_ring_map(lrc); 1124 1125 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 1126 rhs = lrc->ring.size - lrc->ring.tail; 1127 if (size > rhs) { 1128 __xe_lrc_write_ring(lrc, ring, data, rhs); 1129 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 1130 } else { 1131 __xe_lrc_write_ring(lrc, ring, data, size); 1132 } 1133 1134 if (aligned_size > size) { 1135 u32 noop = MI_NOOP; 1136 1137 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 1138 } 1139 } 1140 1141 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 1142 { 1143 return lrc->desc | xe_lrc_ggtt_addr(lrc); 1144 } 1145 1146 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 1147 { 1148 return __xe_lrc_seqno_ggtt_addr(lrc); 1149 } 1150 1151 /** 1152 * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence. 1153 * 1154 * Allocate but don't initialize an lrc seqno fence. 1155 * 1156 * Return: Pointer to the allocated fence or 1157 * negative error pointer on error. 1158 */ 1159 struct dma_fence *xe_lrc_alloc_seqno_fence(void) 1160 { 1161 return xe_hw_fence_alloc(); 1162 } 1163 1164 /** 1165 * xe_lrc_free_seqno_fence() - Free an lrc seqno fence. 1166 * @fence: Pointer to the fence to free. 1167 * 1168 * Frees an lrc seqno fence that hasn't yet been 1169 * initialized. 1170 */ 1171 void xe_lrc_free_seqno_fence(struct dma_fence *fence) 1172 { 1173 xe_hw_fence_free(fence); 1174 } 1175 1176 /** 1177 * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence. 1178 * @lrc: Pointer to the lrc. 1179 * @fence: Pointer to the fence to initialize. 1180 * 1181 * Initializes a pre-allocated lrc seqno fence. 1182 * After initialization, the fence is subject to normal 1183 * dma-fence refcounting. 1184 */ 1185 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence) 1186 { 1187 xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc)); 1188 } 1189 1190 s32 xe_lrc_seqno(struct xe_lrc *lrc) 1191 { 1192 struct iosys_map map = __xe_lrc_seqno_map(lrc); 1193 1194 return xe_map_read32(lrc_to_xe(lrc), &map); 1195 } 1196 1197 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 1198 { 1199 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 1200 1201 return xe_map_read32(lrc_to_xe(lrc), &map); 1202 } 1203 1204 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 1205 { 1206 return __xe_lrc_start_seqno_ggtt_addr(lrc); 1207 } 1208 1209 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 1210 { 1211 return __xe_lrc_parallel_ggtt_addr(lrc); 1212 } 1213 1214 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 1215 { 1216 return __xe_lrc_parallel_map(lrc); 1217 } 1218 1219 static int instr_dw(u32 cmd_header) 1220 { 1221 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 1222 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 1223 GFXPIPE_SINGLE_DW_CMD(0, 0)) 1224 return 1; 1225 1226 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 1227 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 1228 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 1229 1230 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 1231 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 1232 } 1233 1234 static int dump_mi_command(struct drm_printer *p, 1235 struct xe_gt *gt, 1236 u32 *dw, 1237 int remaining_dw) 1238 { 1239 u32 inst_header = *dw; 1240 u32 numdw = instr_dw(inst_header); 1241 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 1242 int num_noop; 1243 1244 /* First check for commands that don't have/use a '# DW' field */ 1245 switch (inst_header & MI_OPCODE) { 1246 case MI_NOOP: 1247 num_noop = 1; 1248 while (num_noop < remaining_dw && 1249 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 1250 num_noop++; 1251 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 1252 return num_noop; 1253 1254 case MI_TOPOLOGY_FILTER: 1255 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 1256 return 1; 1257 1258 case MI_BATCH_BUFFER_END: 1259 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 1260 /* Return 'remaining_dw' to consume the rest of the LRC */ 1261 return remaining_dw; 1262 } 1263 1264 /* 1265 * Any remaining commands include a # of dwords. We should make sure 1266 * it doesn't exceed the remaining size of the LRC. 1267 */ 1268 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1269 numdw = remaining_dw; 1270 1271 switch (inst_header & MI_OPCODE) { 1272 case MI_LOAD_REGISTER_IMM: 1273 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 1274 inst_header, (numdw - 1) / 2); 1275 for (int i = 1; i < numdw; i += 2) 1276 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 1277 return numdw; 1278 1279 case MI_LOAD_REGISTER_MEM & MI_OPCODE: 1280 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", 1281 inst_header, 1282 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", 1283 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); 1284 if (numdw == 4) 1285 drm_printf(p, " - %#6x = %#010llx\n", 1286 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); 1287 else 1288 drm_printf(p, " - %*ph (%s)\n", 1289 (int)sizeof(u32) * (numdw - 1), dw + 1, 1290 numdw < 4 ? "truncated" : "malformed"); 1291 return numdw; 1292 1293 case MI_FORCE_WAKEUP: 1294 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 1295 return numdw; 1296 1297 default: 1298 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 1299 inst_header, opcode, numdw); 1300 return numdw; 1301 } 1302 } 1303 1304 static int dump_gfxpipe_command(struct drm_printer *p, 1305 struct xe_gt *gt, 1306 u32 *dw, 1307 int remaining_dw) 1308 { 1309 u32 numdw = instr_dw(*dw); 1310 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 1311 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 1312 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 1313 1314 /* 1315 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1316 * remaining size of the LRC. 1317 */ 1318 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1319 numdw = remaining_dw; 1320 1321 switch (*dw & GFXPIPE_MATCH_MASK) { 1322 #define MATCH(cmd) \ 1323 case cmd: \ 1324 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1325 return numdw 1326 #define MATCH3D(cmd) \ 1327 case CMD_##cmd: \ 1328 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1329 return numdw 1330 1331 MATCH(STATE_BASE_ADDRESS); 1332 MATCH(STATE_SIP); 1333 MATCH(GPGPU_CSR_BASE_ADDRESS); 1334 MATCH(STATE_COMPUTE_MODE); 1335 MATCH3D(3DSTATE_BTD); 1336 MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); 1337 MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); 1338 1339 MATCH3D(3DSTATE_VF_STATISTICS); 1340 1341 MATCH(PIPELINE_SELECT); 1342 1343 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1344 MATCH3D(3DSTATE_CLEAR_PARAMS); 1345 MATCH3D(3DSTATE_DEPTH_BUFFER); 1346 MATCH3D(3DSTATE_STENCIL_BUFFER); 1347 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1348 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1349 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1350 MATCH3D(3DSTATE_INDEX_BUFFER); 1351 MATCH3D(3DSTATE_VF); 1352 MATCH3D(3DSTATE_MULTISAMPLE); 1353 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1354 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1355 MATCH3D(3DSTATE_VS); 1356 MATCH3D(3DSTATE_GS); 1357 MATCH3D(3DSTATE_CLIP); 1358 MATCH3D(3DSTATE_SF); 1359 MATCH3D(3DSTATE_WM); 1360 MATCH3D(3DSTATE_CONSTANT_VS); 1361 MATCH3D(3DSTATE_CONSTANT_GS); 1362 MATCH3D(3DSTATE_CONSTANT_PS); 1363 MATCH3D(3DSTATE_SAMPLE_MASK); 1364 MATCH3D(3DSTATE_CONSTANT_HS); 1365 MATCH3D(3DSTATE_CONSTANT_DS); 1366 MATCH3D(3DSTATE_HS); 1367 MATCH3D(3DSTATE_TE); 1368 MATCH3D(3DSTATE_DS); 1369 MATCH3D(3DSTATE_STREAMOUT); 1370 MATCH3D(3DSTATE_SBE); 1371 MATCH3D(3DSTATE_PS); 1372 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1373 MATCH3D(3DSTATE_CPS_POINTERS); 1374 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1375 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1376 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1377 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1378 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1379 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1380 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1381 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1382 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1383 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1384 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1385 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1386 MATCH3D(3DSTATE_VF_INSTANCING); 1387 MATCH3D(3DSTATE_VF_SGVS); 1388 MATCH3D(3DSTATE_VF_TOPOLOGY); 1389 MATCH3D(3DSTATE_WM_CHROMAKEY); 1390 MATCH3D(3DSTATE_PS_BLEND); 1391 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1392 MATCH3D(3DSTATE_PS_EXTRA); 1393 MATCH3D(3DSTATE_RASTER); 1394 MATCH3D(3DSTATE_SBE_SWIZ); 1395 MATCH3D(3DSTATE_WM_HZ_OP); 1396 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1397 MATCH3D(3DSTATE_VF_SGVS_2); 1398 MATCH3D(3DSTATE_VFG); 1399 MATCH3D(3DSTATE_URB_ALLOC_VS); 1400 MATCH3D(3DSTATE_URB_ALLOC_HS); 1401 MATCH3D(3DSTATE_URB_ALLOC_DS); 1402 MATCH3D(3DSTATE_URB_ALLOC_GS); 1403 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1404 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1405 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1406 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1407 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1408 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1409 MATCH3D(3DSTATE_AMFS); 1410 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1411 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1412 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1413 MATCH3D(3DSTATE_MESH_CONTROL); 1414 MATCH3D(3DSTATE_MESH_DISTRIB); 1415 MATCH3D(3DSTATE_TASK_REDISTRIB); 1416 MATCH3D(3DSTATE_MESH_SHADER); 1417 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1418 MATCH3D(3DSTATE_TASK_CONTROL); 1419 MATCH3D(3DSTATE_TASK_SHADER); 1420 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1421 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1422 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1423 MATCH3D(3DSTATE_CLIP_MESH); 1424 MATCH3D(3DSTATE_SBE_MESH); 1425 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1426 1427 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1428 MATCH3D(3DSTATE_CHROMA_KEY); 1429 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1430 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1431 MATCH3D(3DSTATE_LINE_STIPPLE); 1432 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1433 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1434 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1435 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1436 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1437 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1438 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1439 MATCH3D(3DSTATE_SO_DECL_LIST); 1440 MATCH3D(3DSTATE_SO_BUFFER); 1441 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1442 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1443 MATCH3D(3DSTATE_3D_MODE); 1444 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1445 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1446 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1447 1448 default: 1449 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1450 *dw, pipeline, opcode, subopcode, numdw); 1451 return numdw; 1452 } 1453 } 1454 1455 static int dump_gfx_state_command(struct drm_printer *p, 1456 struct xe_gt *gt, 1457 u32 *dw, 1458 int remaining_dw) 1459 { 1460 u32 numdw = instr_dw(*dw); 1461 u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); 1462 1463 /* 1464 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1465 * remaining size of the LRC. 1466 */ 1467 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1468 numdw = remaining_dw; 1469 1470 switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { 1471 MATCH(STATE_WRITE_INLINE); 1472 1473 default: 1474 drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", 1475 *dw, opcode, numdw); 1476 return numdw; 1477 } 1478 } 1479 1480 void xe_lrc_dump_default(struct drm_printer *p, 1481 struct xe_gt *gt, 1482 enum xe_engine_class hwe_class) 1483 { 1484 u32 *dw; 1485 int remaining_dw, num_dw; 1486 1487 if (!gt->default_lrc[hwe_class]) { 1488 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1489 return; 1490 } 1491 1492 /* 1493 * Skip the beginning of the LRC since it contains the per-process 1494 * hardware status page. 1495 */ 1496 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1497 remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; 1498 1499 while (remaining_dw > 0) { 1500 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1501 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1502 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1503 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1504 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { 1505 num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); 1506 } else { 1507 num_dw = min(instr_dw(*dw), remaining_dw); 1508 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 1509 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 1510 num_dw); 1511 } 1512 1513 dw += num_dw; 1514 remaining_dw -= num_dw; 1515 } 1516 } 1517 1518 struct instr_state { 1519 u32 instr; 1520 u16 num_dw; 1521 }; 1522 1523 static const struct instr_state xe_hpg_svg_state[] = { 1524 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 1525 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 1526 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 1527 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 1528 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 1529 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 1530 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 1531 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 1532 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 1533 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 1534 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 1535 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 1536 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 1537 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 1538 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 1539 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 1540 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 1541 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 1542 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 1543 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 1544 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 1545 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 1546 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 1547 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 1548 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 1549 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 1550 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 1551 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 1552 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 1553 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 1554 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 1555 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 1556 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 1557 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 1558 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 1559 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 1560 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 1561 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 1562 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 1563 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 1564 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 1565 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 1566 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 1567 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 1568 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 1569 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 1570 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 1571 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 1572 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 1573 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 1574 }; 1575 1576 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) 1577 { 1578 struct xe_gt *gt = q->hwe->gt; 1579 struct xe_device *xe = gt_to_xe(gt); 1580 const struct instr_state *state_table = NULL; 1581 int state_table_size = 0; 1582 1583 /* 1584 * At the moment we only need to emit non-register state for the RCS 1585 * engine. 1586 */ 1587 if (q->hwe->class != XE_ENGINE_CLASS_RENDER) 1588 return; 1589 1590 switch (GRAPHICS_VERx100(xe)) { 1591 case 1255: 1592 case 1270 ... 2004: 1593 state_table = xe_hpg_svg_state; 1594 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1595 break; 1596 default: 1597 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1598 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1599 return; 1600 } 1601 1602 for (int i = 0; i < state_table_size; i++) { 1603 u32 instr = state_table[i].instr; 1604 u16 num_dw = state_table[i].num_dw; 1605 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 1606 1607 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 1608 xe_gt_assert(gt, num_dw != 0); 1609 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 1610 1611 /* 1612 * Xe2's SVG context is the same as the one on DG2 / MTL 1613 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 1614 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 1615 * Just make the replacement here rather than defining a 1616 * whole separate table for the single trivial change. 1617 */ 1618 if (GRAPHICS_VER(xe) >= 20 && 1619 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 1620 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 1621 1622 bb->cs[bb->len] = instr; 1623 if (!is_single_dw) 1624 bb->cs[bb->len] |= (num_dw - 2); 1625 1626 bb->len += num_dw; 1627 } 1628 } 1629 1630 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) 1631 { 1632 struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT); 1633 1634 if (!snapshot) 1635 return NULL; 1636 1637 if (lrc->bo && lrc->bo->vm) 1638 xe_vm_get(lrc->bo->vm); 1639 1640 snapshot->context_desc = xe_lrc_ggtt_addr(lrc); 1641 snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); 1642 snapshot->head = xe_lrc_ring_head(lrc); 1643 snapshot->tail.internal = lrc->ring.tail; 1644 snapshot->tail.memory = xe_lrc_ring_tail(lrc); 1645 snapshot->start_seqno = xe_lrc_start_seqno(lrc); 1646 snapshot->seqno = xe_lrc_seqno(lrc); 1647 snapshot->lrc_bo = xe_bo_get(lrc->bo); 1648 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); 1649 snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; 1650 snapshot->lrc_snapshot = NULL; 1651 snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 1652 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); 1653 return snapshot; 1654 } 1655 1656 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) 1657 { 1658 struct xe_bo *bo; 1659 struct xe_vm *vm; 1660 struct iosys_map src; 1661 1662 if (!snapshot) 1663 return; 1664 1665 bo = snapshot->lrc_bo; 1666 vm = bo->vm; 1667 snapshot->lrc_bo = NULL; 1668 1669 snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); 1670 if (!snapshot->lrc_snapshot) 1671 goto put_bo; 1672 1673 xe_bo_lock(bo, false); 1674 if (!ttm_bo_vmap(&bo->ttm, &src)) { 1675 xe_map_memcpy_from(xe_bo_device(bo), 1676 snapshot->lrc_snapshot, &src, snapshot->lrc_offset, 1677 snapshot->lrc_size); 1678 ttm_bo_vunmap(&bo->ttm, &src); 1679 } else { 1680 kvfree(snapshot->lrc_snapshot); 1681 snapshot->lrc_snapshot = NULL; 1682 } 1683 xe_bo_unlock(bo); 1684 put_bo: 1685 xe_bo_put(bo); 1686 if (vm) 1687 xe_vm_put(vm); 1688 } 1689 1690 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) 1691 { 1692 unsigned long i; 1693 1694 if (!snapshot) 1695 return; 1696 1697 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); 1698 drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", 1699 snapshot->indirect_context_desc); 1700 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); 1701 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 1702 snapshot->tail.internal, snapshot->tail.memory); 1703 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); 1704 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); 1705 drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); 1706 drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); 1707 1708 if (!snapshot->lrc_snapshot) 1709 return; 1710 1711 drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); 1712 drm_puts(p, "\t[HWSP].data: "); 1713 for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { 1714 u32 *val = snapshot->lrc_snapshot + i; 1715 char dumped[ASCII85_BUFSZ]; 1716 1717 drm_puts(p, ascii85_encode(*val, dumped)); 1718 } 1719 1720 drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); 1721 drm_puts(p, "\t[HWCTX].data: "); 1722 for (; i < snapshot->lrc_size; i += sizeof(u32)) { 1723 u32 *val = snapshot->lrc_snapshot + i; 1724 char dumped[ASCII85_BUFSZ]; 1725 1726 drm_puts(p, ascii85_encode(*val, dumped)); 1727 } 1728 drm_puts(p, "\n"); 1729 } 1730 1731 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) 1732 { 1733 if (!snapshot) 1734 return; 1735 1736 kvfree(snapshot->lrc_snapshot); 1737 if (snapshot->lrc_bo) { 1738 struct xe_vm *vm; 1739 1740 vm = snapshot->lrc_bo->vm; 1741 xe_bo_put(snapshot->lrc_bo); 1742 if (vm) 1743 xe_vm_put(vm); 1744 } 1745 kfree(snapshot); 1746 } 1747 1748 /** 1749 * xe_lrc_update_timestamp() - Update ctx timestamp 1750 * @lrc: Pointer to the lrc. 1751 * @old_ts: Old timestamp value 1752 * 1753 * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and 1754 * update saved value. 1755 * 1756 * Returns: New ctx timestamp value 1757 */ 1758 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) 1759 { 1760 *old_ts = lrc->ctx_timestamp; 1761 1762 lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 1763 1764 return lrc->ctx_timestamp; 1765 } 1766