1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include <generated/xe_wa_oob.h> 9 10 #include <linux/ascii85.h> 11 12 #include "instructions/xe_mi_commands.h" 13 #include "instructions/xe_gfxpipe_commands.h" 14 #include "instructions/xe_gfx_state_commands.h" 15 #include "regs/xe_engine_regs.h" 16 #include "regs/xe_lrc_layout.h" 17 #include "xe_bb.h" 18 #include "xe_bo.h" 19 #include "xe_device.h" 20 #include "xe_drm_client.h" 21 #include "xe_exec_queue_types.h" 22 #include "xe_gt.h" 23 #include "xe_gt_printk.h" 24 #include "xe_hw_fence.h" 25 #include "xe_map.h" 26 #include "xe_memirq.h" 27 #include "xe_sriov.h" 28 #include "xe_vm.h" 29 #include "xe_wa.h" 30 31 #define LRC_VALID BIT_ULL(0) 32 #define LRC_PRIVILEGE BIT_ULL(8) 33 #define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) 34 #define LRC_LEGACY_64B_CONTEXT 3 35 36 #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) 37 #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) 38 39 #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K 40 41 static struct xe_device * 42 lrc_to_xe(struct xe_lrc *lrc) 43 { 44 return gt_to_xe(lrc->fence_ctx.gt); 45 } 46 47 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) 48 { 49 struct xe_device *xe = gt_to_xe(gt); 50 size_t size; 51 52 switch (class) { 53 case XE_ENGINE_CLASS_RENDER: 54 if (GRAPHICS_VER(xe) >= 20) 55 size = 4 * SZ_4K; 56 else 57 size = 14 * SZ_4K; 58 break; 59 case XE_ENGINE_CLASS_COMPUTE: 60 /* 14 pages since graphics_ver == 11 */ 61 if (GRAPHICS_VER(xe) >= 20) 62 size = 3 * SZ_4K; 63 else 64 size = 14 * SZ_4K; 65 break; 66 default: 67 WARN(1, "Unknown engine class: %d", class); 68 fallthrough; 69 case XE_ENGINE_CLASS_COPY: 70 case XE_ENGINE_CLASS_VIDEO_DECODE: 71 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 72 case XE_ENGINE_CLASS_OTHER: 73 size = 2 * SZ_4K; 74 } 75 76 /* Add indirect ring state page */ 77 if (xe_gt_has_indirect_ring_state(gt)) 78 size += LRC_INDIRECT_RING_STATE_SIZE; 79 80 return size; 81 } 82 83 /* 84 * The per-platform tables are u8-encoded in @data. Decode @data and set the 85 * addresses' offset and commands in @regs. The following encoding is used 86 * for each byte. There are 2 steps: decoding commands and decoding addresses. 87 * 88 * Commands: 89 * [7]: create NOPs - number of NOPs are set in lower bits 90 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 91 * MI_LRI_FORCE_POSTED 92 * [5:0]: Number of NOPs or registers to set values to in case of 93 * MI_LOAD_REGISTER_IMM 94 * 95 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 96 * number of registers. They are set by using the REG/REG16 macros: the former 97 * is used for offsets smaller than 0x200 while the latter is for values bigger 98 * than that. Those macros already set all the bits documented below correctly: 99 * 100 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 101 * follow, for the lower bits 102 * [6:0]: Register offset, without considering the engine base. 103 * 104 * This function only tweaks the commands and register offsets. Values are not 105 * filled out. 106 */ 107 static void set_offsets(u32 *regs, 108 const u8 *data, 109 const struct xe_hw_engine *hwe) 110 #define NOP(x) (BIT(7) | (x)) 111 #define LRI(count, flags) ((flags) << 6 | (count) | \ 112 BUILD_BUG_ON_ZERO(count >= BIT(6))) 113 #define POSTED BIT(0) 114 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 115 #define REG16(x) \ 116 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 117 (((x) >> 2) & 0x7f) 118 { 119 const u32 base = hwe->mmio_base; 120 121 while (*data) { 122 u8 count, flags; 123 124 if (*data & BIT(7)) { /* skip */ 125 count = *data++ & ~BIT(7); 126 regs += count; 127 continue; 128 } 129 130 count = *data & 0x3f; 131 flags = *data >> 6; 132 data++; 133 134 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 135 if (flags & POSTED) 136 *regs |= MI_LRI_FORCE_POSTED; 137 *regs |= MI_LRI_LRM_CS_MMIO; 138 regs++; 139 140 xe_gt_assert(hwe->gt, count); 141 do { 142 u32 offset = 0; 143 u8 v; 144 145 do { 146 v = *data++; 147 offset <<= 7; 148 offset |= v & ~BIT(7); 149 } while (v & BIT(7)); 150 151 regs[0] = base + (offset << 2); 152 regs += 2; 153 } while (--count); 154 } 155 156 *regs = MI_BATCH_BUFFER_END | BIT(0); 157 } 158 159 static const u8 gen12_xcs_offsets[] = { 160 NOP(1), 161 LRI(13, POSTED), 162 REG16(0x244), 163 REG(0x034), 164 REG(0x030), 165 REG(0x038), 166 REG(0x03c), 167 REG(0x168), 168 REG(0x140), 169 REG(0x110), 170 REG(0x1c0), 171 REG(0x1c4), 172 REG(0x1c8), 173 REG(0x180), 174 REG16(0x2b4), 175 176 NOP(5), 177 LRI(9, POSTED), 178 REG16(0x3a8), 179 REG16(0x28c), 180 REG16(0x288), 181 REG16(0x284), 182 REG16(0x280), 183 REG16(0x27c), 184 REG16(0x278), 185 REG16(0x274), 186 REG16(0x270), 187 188 0 189 }; 190 191 static const u8 dg2_xcs_offsets[] = { 192 NOP(1), 193 LRI(15, POSTED), 194 REG16(0x244), 195 REG(0x034), 196 REG(0x030), 197 REG(0x038), 198 REG(0x03c), 199 REG(0x168), 200 REG(0x140), 201 REG(0x110), 202 REG(0x1c0), 203 REG(0x1c4), 204 REG(0x1c8), 205 REG(0x180), 206 REG16(0x2b4), 207 REG(0x120), 208 REG(0x124), 209 210 NOP(1), 211 LRI(9, POSTED), 212 REG16(0x3a8), 213 REG16(0x28c), 214 REG16(0x288), 215 REG16(0x284), 216 REG16(0x280), 217 REG16(0x27c), 218 REG16(0x278), 219 REG16(0x274), 220 REG16(0x270), 221 222 0 223 }; 224 225 static const u8 gen12_rcs_offsets[] = { 226 NOP(1), 227 LRI(13, POSTED), 228 REG16(0x244), 229 REG(0x034), 230 REG(0x030), 231 REG(0x038), 232 REG(0x03c), 233 REG(0x168), 234 REG(0x140), 235 REG(0x110), 236 REG(0x1c0), 237 REG(0x1c4), 238 REG(0x1c8), 239 REG(0x180), 240 REG16(0x2b4), 241 242 NOP(5), 243 LRI(9, POSTED), 244 REG16(0x3a8), 245 REG16(0x28c), 246 REG16(0x288), 247 REG16(0x284), 248 REG16(0x280), 249 REG16(0x27c), 250 REG16(0x278), 251 REG16(0x274), 252 REG16(0x270), 253 254 LRI(3, POSTED), 255 REG(0x1b0), 256 REG16(0x5a8), 257 REG16(0x5ac), 258 259 NOP(6), 260 LRI(1, 0), 261 REG(0x0c8), 262 NOP(3 + 9 + 1), 263 264 LRI(51, POSTED), 265 REG16(0x588), 266 REG16(0x588), 267 REG16(0x588), 268 REG16(0x588), 269 REG16(0x588), 270 REG16(0x588), 271 REG(0x028), 272 REG(0x09c), 273 REG(0x0c0), 274 REG(0x178), 275 REG(0x17c), 276 REG16(0x358), 277 REG(0x170), 278 REG(0x150), 279 REG(0x154), 280 REG(0x158), 281 REG16(0x41c), 282 REG16(0x600), 283 REG16(0x604), 284 REG16(0x608), 285 REG16(0x60c), 286 REG16(0x610), 287 REG16(0x614), 288 REG16(0x618), 289 REG16(0x61c), 290 REG16(0x620), 291 REG16(0x624), 292 REG16(0x628), 293 REG16(0x62c), 294 REG16(0x630), 295 REG16(0x634), 296 REG16(0x638), 297 REG16(0x63c), 298 REG16(0x640), 299 REG16(0x644), 300 REG16(0x648), 301 REG16(0x64c), 302 REG16(0x650), 303 REG16(0x654), 304 REG16(0x658), 305 REG16(0x65c), 306 REG16(0x660), 307 REG16(0x664), 308 REG16(0x668), 309 REG16(0x66c), 310 REG16(0x670), 311 REG16(0x674), 312 REG16(0x678), 313 REG16(0x67c), 314 REG(0x068), 315 REG(0x084), 316 NOP(1), 317 318 0 319 }; 320 321 static const u8 xehp_rcs_offsets[] = { 322 NOP(1), 323 LRI(13, POSTED), 324 REG16(0x244), 325 REG(0x034), 326 REG(0x030), 327 REG(0x038), 328 REG(0x03c), 329 REG(0x168), 330 REG(0x140), 331 REG(0x110), 332 REG(0x1c0), 333 REG(0x1c4), 334 REG(0x1c8), 335 REG(0x180), 336 REG16(0x2b4), 337 338 NOP(5), 339 LRI(9, POSTED), 340 REG16(0x3a8), 341 REG16(0x28c), 342 REG16(0x288), 343 REG16(0x284), 344 REG16(0x280), 345 REG16(0x27c), 346 REG16(0x278), 347 REG16(0x274), 348 REG16(0x270), 349 350 LRI(3, POSTED), 351 REG(0x1b0), 352 REG16(0x5a8), 353 REG16(0x5ac), 354 355 NOP(6), 356 LRI(1, 0), 357 REG(0x0c8), 358 359 0 360 }; 361 362 static const u8 dg2_rcs_offsets[] = { 363 NOP(1), 364 LRI(15, POSTED), 365 REG16(0x244), 366 REG(0x034), 367 REG(0x030), 368 REG(0x038), 369 REG(0x03c), 370 REG(0x168), 371 REG(0x140), 372 REG(0x110), 373 REG(0x1c0), 374 REG(0x1c4), 375 REG(0x1c8), 376 REG(0x180), 377 REG16(0x2b4), 378 REG(0x120), 379 REG(0x124), 380 381 NOP(1), 382 LRI(9, POSTED), 383 REG16(0x3a8), 384 REG16(0x28c), 385 REG16(0x288), 386 REG16(0x284), 387 REG16(0x280), 388 REG16(0x27c), 389 REG16(0x278), 390 REG16(0x274), 391 REG16(0x270), 392 393 LRI(3, POSTED), 394 REG(0x1b0), 395 REG16(0x5a8), 396 REG16(0x5ac), 397 398 NOP(6), 399 LRI(1, 0), 400 REG(0x0c8), 401 402 0 403 }; 404 405 static const u8 mtl_rcs_offsets[] = { 406 NOP(1), 407 LRI(15, POSTED), 408 REG16(0x244), 409 REG(0x034), 410 REG(0x030), 411 REG(0x038), 412 REG(0x03c), 413 REG(0x168), 414 REG(0x140), 415 REG(0x110), 416 REG(0x1c0), 417 REG(0x1c4), 418 REG(0x1c8), 419 REG(0x180), 420 REG16(0x2b4), 421 REG(0x120), 422 REG(0x124), 423 424 NOP(1), 425 LRI(9, POSTED), 426 REG16(0x3a8), 427 REG16(0x28c), 428 REG16(0x288), 429 REG16(0x284), 430 REG16(0x280), 431 REG16(0x27c), 432 REG16(0x278), 433 REG16(0x274), 434 REG16(0x270), 435 436 NOP(2), 437 LRI(2, POSTED), 438 REG16(0x5a8), 439 REG16(0x5ac), 440 441 NOP(6), 442 LRI(1, 0), 443 REG(0x0c8), 444 445 0 446 }; 447 448 #define XE2_CTX_COMMON \ 449 NOP(1), /* [0x00] */ \ 450 LRI(15, POSTED), /* [0x01] */ \ 451 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 452 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 453 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 454 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 455 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 456 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 457 REG(0x140), /* [0x0e] BB_ADDR */ \ 458 REG(0x110), /* [0x10] BB_STATE */ \ 459 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 460 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 461 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 462 REG(0x180), /* [0x18] CCID */ \ 463 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 464 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 465 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 466 \ 467 NOP(1), /* [0x20] */ \ 468 LRI(9, POSTED), /* [0x21] */ \ 469 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 470 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 471 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 472 REG16(0x284), /* [0x28] dummy reg */ \ 473 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 474 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 475 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 476 REG16(0x274), /* [0x30] PTBP_UDW */ \ 477 REG16(0x270) /* [0x32] PTBP_LDW */ 478 479 static const u8 xe2_rcs_offsets[] = { 480 XE2_CTX_COMMON, 481 482 NOP(2), /* [0x34] */ 483 LRI(2, POSTED), /* [0x36] */ 484 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 485 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 486 487 NOP(6), /* [0x41] */ 488 LRI(1, 0), /* [0x47] */ 489 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 490 491 0 492 }; 493 494 static const u8 xe2_bcs_offsets[] = { 495 XE2_CTX_COMMON, 496 497 NOP(4 + 8 + 1), /* [0x34] */ 498 LRI(2, POSTED), /* [0x41] */ 499 REG16(0x200), /* [0x42] BCS_SWCTRL */ 500 REG16(0x204), /* [0x44] BLIT_CCTL */ 501 502 0 503 }; 504 505 static const u8 xe2_xcs_offsets[] = { 506 XE2_CTX_COMMON, 507 508 0 509 }; 510 511 static const u8 xe2_indirect_ring_state_offsets[] = { 512 NOP(1), /* [0x00] */ 513 LRI(5, POSTED), /* [0x01] */ 514 REG(0x034), /* [0x02] RING_BUFFER_HEAD */ 515 REG(0x030), /* [0x04] RING_BUFFER_TAIL */ 516 REG(0x038), /* [0x06] RING_BUFFER_START */ 517 REG(0x048), /* [0x08] RING_BUFFER_START_UDW */ 518 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ 519 520 NOP(5), /* [0x0c] */ 521 LRI(9, POSTED), /* [0x11] */ 522 REG(0x168), /* [0x12] BB_ADDR_UDW */ 523 REG(0x140), /* [0x14] BB_ADDR */ 524 REG(0x110), /* [0x16] BB_STATE */ 525 REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */ 526 REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */ 527 REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */ 528 REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */ 529 REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */ 530 REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */ 531 532 NOP(12), /* [0x00] */ 533 534 0 535 }; 536 537 #undef REG16 538 #undef REG 539 #undef LRI 540 #undef NOP 541 542 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 543 { 544 if (class == XE_ENGINE_CLASS_RENDER) { 545 if (GRAPHICS_VER(xe) >= 20) 546 return xe2_rcs_offsets; 547 else if (GRAPHICS_VERx100(xe) >= 1270) 548 return mtl_rcs_offsets; 549 else if (GRAPHICS_VERx100(xe) >= 1255) 550 return dg2_rcs_offsets; 551 else if (GRAPHICS_VERx100(xe) >= 1250) 552 return xehp_rcs_offsets; 553 else 554 return gen12_rcs_offsets; 555 } else if (class == XE_ENGINE_CLASS_COPY) { 556 if (GRAPHICS_VER(xe) >= 20) 557 return xe2_bcs_offsets; 558 else 559 return gen12_xcs_offsets; 560 } else { 561 if (GRAPHICS_VER(xe) >= 20) 562 return xe2_xcs_offsets; 563 else if (GRAPHICS_VERx100(xe) >= 1255) 564 return dg2_xcs_offsets; 565 else 566 return gen12_xcs_offsets; 567 } 568 } 569 570 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 571 { 572 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | 573 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 574 575 if (xe_gt_has_indirect_ring_state(hwe->gt)) 576 regs[CTX_CONTEXT_CONTROL] |= 577 _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); 578 579 /* TODO: Timestamp */ 580 } 581 582 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) 583 { 584 struct xe_memirq *memirq = >_to_tile(hwe->gt)->memirq; 585 struct xe_device *xe = gt_to_xe(hwe->gt); 586 587 if (!xe_device_uses_memirq(xe)) 588 return; 589 590 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | 591 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; 592 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 593 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 594 595 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | 596 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 597 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 598 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe); 599 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 600 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe); 601 } 602 603 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 604 { 605 struct xe_device *xe = gt_to_xe(hwe->gt); 606 607 if (GRAPHICS_VERx100(xe) >= 1250) 608 return 0x70; 609 else 610 return 0x60; 611 } 612 613 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 614 { 615 int x; 616 617 x = lrc_ring_mi_mode(hwe); 618 regs[x + 1] &= ~STOP_RING; 619 regs[x + 1] |= STOP_RING << 16; 620 } 621 622 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc) 623 { 624 return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE; 625 } 626 627 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 628 { 629 return 0; 630 } 631 632 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 633 { 634 return lrc->ring.size; 635 } 636 637 /* Make the magic macros work */ 638 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 639 #define __xe_lrc_regs_offset xe_lrc_regs_offset 640 641 #define LRC_SEQNO_PPHWSP_OFFSET 512 642 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 643 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) 644 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 645 #define LRC_PPHWSP_SIZE SZ_4K 646 647 u32 xe_lrc_regs_offset(struct xe_lrc *lrc) 648 { 649 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 650 } 651 652 static size_t lrc_reg_size(struct xe_device *xe) 653 { 654 if (GRAPHICS_VERx100(xe) >= 1250) 655 return 96 * sizeof(u32); 656 else 657 return 80 * sizeof(u32); 658 } 659 660 size_t xe_lrc_skip_size(struct xe_device *xe) 661 { 662 return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 663 } 664 665 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 666 { 667 /* The seqno is stored in the driver-defined portion of PPHWSP */ 668 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 669 } 670 671 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 672 { 673 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 674 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 675 } 676 677 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) 678 { 679 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 680 return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; 681 } 682 683 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 684 { 685 /* The parallel is stored in the driver-defined portion of PPHWSP */ 686 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 687 } 688 689 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) 690 { 691 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); 692 } 693 694 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) 695 { 696 /* Indirect ring state page is at the very end of LRC */ 697 return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; 698 } 699 700 #define DECL_MAP_ADDR_HELPERS(elem) \ 701 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 702 { \ 703 struct iosys_map map = lrc->bo->vmap; \ 704 \ 705 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 706 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 707 return map; \ 708 } \ 709 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 710 { \ 711 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 712 } \ 713 714 DECL_MAP_ADDR_HELPERS(ring) 715 DECL_MAP_ADDR_HELPERS(pphwsp) 716 DECL_MAP_ADDR_HELPERS(seqno) 717 DECL_MAP_ADDR_HELPERS(regs) 718 DECL_MAP_ADDR_HELPERS(start_seqno) 719 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) 720 DECL_MAP_ADDR_HELPERS(ctx_timestamp) 721 DECL_MAP_ADDR_HELPERS(parallel) 722 DECL_MAP_ADDR_HELPERS(indirect_ring) 723 724 #undef DECL_MAP_ADDR_HELPERS 725 726 /** 727 * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address 728 * @lrc: Pointer to the lrc. 729 * 730 * Returns: ctx timestamp GGTT address 731 */ 732 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) 733 { 734 return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); 735 } 736 737 /** 738 * xe_lrc_ctx_timestamp() - Read ctx timestamp value 739 * @lrc: Pointer to the lrc. 740 * 741 * Returns: ctx timestamp value 742 */ 743 u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) 744 { 745 struct xe_device *xe = lrc_to_xe(lrc); 746 struct iosys_map map; 747 748 map = __xe_lrc_ctx_timestamp_map(lrc); 749 return xe_map_read32(xe, &map); 750 } 751 752 /** 753 * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address 754 * @lrc: Pointer to the lrc. 755 * 756 * Returns: ctx timestamp job GGTT address 757 */ 758 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) 759 { 760 return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); 761 } 762 763 /** 764 * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value 765 * @lrc: Pointer to the lrc. 766 * 767 * Returns: ctx timestamp job value 768 */ 769 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) 770 { 771 struct xe_device *xe = lrc_to_xe(lrc); 772 struct iosys_map map; 773 774 map = __xe_lrc_ctx_job_timestamp_map(lrc); 775 return xe_map_read32(xe, &map); 776 } 777 778 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 779 { 780 return __xe_lrc_pphwsp_ggtt_addr(lrc); 781 } 782 783 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc) 784 { 785 if (!xe_lrc_has_indirect_ring_state(lrc)) 786 return 0; 787 788 return __xe_lrc_indirect_ring_ggtt_addr(lrc); 789 } 790 791 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr) 792 { 793 struct xe_device *xe = lrc_to_xe(lrc); 794 struct iosys_map map; 795 796 map = __xe_lrc_indirect_ring_map(lrc); 797 iosys_map_incr(&map, reg_nr * sizeof(u32)); 798 return xe_map_read32(xe, &map); 799 } 800 801 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc, 802 int reg_nr, u32 val) 803 { 804 struct xe_device *xe = lrc_to_xe(lrc); 805 struct iosys_map map; 806 807 map = __xe_lrc_indirect_ring_map(lrc); 808 iosys_map_incr(&map, reg_nr * sizeof(u32)); 809 xe_map_write32(xe, &map, val); 810 } 811 812 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 813 { 814 struct xe_device *xe = lrc_to_xe(lrc); 815 struct iosys_map map; 816 817 map = __xe_lrc_regs_map(lrc); 818 iosys_map_incr(&map, reg_nr * sizeof(u32)); 819 return xe_map_read32(xe, &map); 820 } 821 822 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 823 { 824 struct xe_device *xe = lrc_to_xe(lrc); 825 struct iosys_map map; 826 827 map = __xe_lrc_regs_map(lrc); 828 iosys_map_incr(&map, reg_nr * sizeof(u32)); 829 xe_map_write32(xe, &map, val); 830 } 831 832 static void *empty_lrc_data(struct xe_hw_engine *hwe) 833 { 834 struct xe_gt *gt = hwe->gt; 835 void *data; 836 u32 *regs; 837 838 data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); 839 if (!data) 840 return NULL; 841 842 /* 1st page: Per-Process of HW status Page */ 843 regs = data + LRC_PPHWSP_SIZE; 844 set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe); 845 set_context_control(regs, hwe); 846 set_memory_based_intr(regs, hwe); 847 reset_stop_ring(regs, hwe); 848 if (xe_gt_has_indirect_ring_state(gt)) { 849 regs = data + xe_gt_lrc_size(gt, hwe->class) - 850 LRC_INDIRECT_RING_STATE_SIZE; 851 set_offsets(regs, xe2_indirect_ring_state_offsets, hwe); 852 } 853 854 return data; 855 } 856 857 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 858 { 859 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); 860 861 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 862 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 863 } 864 865 static void xe_lrc_finish(struct xe_lrc *lrc) 866 { 867 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 868 xe_bo_lock(lrc->bo, false); 869 xe_bo_unpin(lrc->bo); 870 xe_bo_unlock(lrc->bo); 871 xe_bo_put(lrc->bo); 872 } 873 874 #define PVC_CTX_ASID (0x2e + 1) 875 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 876 877 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 878 struct xe_vm *vm, u32 ring_size) 879 { 880 struct xe_gt *gt = hwe->gt; 881 struct xe_tile *tile = gt_to_tile(gt); 882 struct xe_device *xe = gt_to_xe(gt); 883 struct iosys_map map; 884 void *init_data = NULL; 885 u32 arb_enable; 886 u32 lrc_size; 887 int err; 888 889 kref_init(&lrc->refcount); 890 lrc->flags = 0; 891 lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); 892 if (xe_gt_has_indirect_ring_state(gt)) 893 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; 894 895 /* 896 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 897 * via VM bind calls. 898 */ 899 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, 900 ttm_bo_type_kernel, 901 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 902 XE_BO_FLAG_GGTT | 903 XE_BO_FLAG_GGTT_INVALIDATE); 904 if (IS_ERR(lrc->bo)) 905 return PTR_ERR(lrc->bo); 906 907 lrc->size = lrc_size; 908 lrc->tile = gt_to_tile(hwe->gt); 909 lrc->ring.size = ring_size; 910 lrc->ring.tail = 0; 911 lrc->ctx_timestamp = 0; 912 913 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 914 hwe->fence_irq, hwe->name); 915 916 if (!gt->default_lrc[hwe->class]) { 917 init_data = empty_lrc_data(hwe); 918 if (!init_data) { 919 err = -ENOMEM; 920 goto err_lrc_finish; 921 } 922 } 923 924 /* 925 * Init Per-Process of HW status Page, LRC / context state to known 926 * values 927 */ 928 map = __xe_lrc_pphwsp_map(lrc); 929 if (!init_data) { 930 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 931 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 932 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 933 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); 934 } else { 935 xe_map_memcpy_to(xe, &map, 0, init_data, 936 xe_gt_lrc_size(gt, hwe->class)); 937 kfree(init_data); 938 } 939 940 if (vm) { 941 xe_lrc_set_ppgtt(lrc, vm); 942 943 if (vm->xef) 944 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 945 } 946 947 if (xe_gt_has_indirect_ring_state(gt)) { 948 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, 949 __xe_lrc_indirect_ring_ggtt_addr(lrc)); 950 951 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, 952 __xe_lrc_ring_ggtt_addr(lrc)); 953 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); 954 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); 955 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); 956 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, 957 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 958 } else { 959 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 960 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 961 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 962 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 963 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 964 } 965 966 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); 967 968 if (xe->info.has_asid && vm) 969 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); 970 971 lrc->desc = LRC_VALID; 972 lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); 973 /* TODO: Priority */ 974 975 /* While this appears to have something about privileged batches or 976 * some such, it really just means PPGTT mode. 977 */ 978 if (vm) 979 lrc->desc |= LRC_PRIVILEGE; 980 981 if (GRAPHICS_VERx100(xe) < 1250) { 982 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); 983 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); 984 } 985 986 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 987 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 988 989 map = __xe_lrc_seqno_map(lrc); 990 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 991 992 map = __xe_lrc_start_seqno_map(lrc); 993 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 994 995 return 0; 996 997 err_lrc_finish: 998 xe_lrc_finish(lrc); 999 return err; 1000 } 1001 1002 /** 1003 * xe_lrc_create - Create a LRC 1004 * @hwe: Hardware Engine 1005 * @vm: The VM (address space) 1006 * @ring_size: LRC ring size 1007 * 1008 * Allocate and initialize the Logical Ring Context (LRC). 1009 * 1010 * Return pointer to created LRC upon success and an error pointer 1011 * upon failure. 1012 */ 1013 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, 1014 u32 ring_size) 1015 { 1016 struct xe_lrc *lrc; 1017 int err; 1018 1019 lrc = kzalloc(sizeof(*lrc), GFP_KERNEL); 1020 if (!lrc) 1021 return ERR_PTR(-ENOMEM); 1022 1023 err = xe_lrc_init(lrc, hwe, vm, ring_size); 1024 if (err) { 1025 kfree(lrc); 1026 return ERR_PTR(err); 1027 } 1028 1029 return lrc; 1030 } 1031 1032 /** 1033 * xe_lrc_destroy - Destroy the LRC 1034 * @ref: reference to LRC 1035 * 1036 * Called when ref == 0, release resources held by the Logical Ring Context 1037 * (LRC) and free the LRC memory. 1038 */ 1039 void xe_lrc_destroy(struct kref *ref) 1040 { 1041 struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount); 1042 1043 xe_lrc_finish(lrc); 1044 kfree(lrc); 1045 } 1046 1047 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) 1048 { 1049 if (xe_lrc_has_indirect_ring_state(lrc)) 1050 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail); 1051 else 1052 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail); 1053 } 1054 1055 u32 xe_lrc_ring_tail(struct xe_lrc *lrc) 1056 { 1057 if (xe_lrc_has_indirect_ring_state(lrc)) 1058 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR; 1059 else 1060 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; 1061 } 1062 1063 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 1064 { 1065 if (xe_lrc_has_indirect_ring_state(lrc)) 1066 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head); 1067 else 1068 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 1069 } 1070 1071 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 1072 { 1073 if (xe_lrc_has_indirect_ring_state(lrc)) 1074 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR; 1075 else 1076 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 1077 } 1078 1079 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 1080 { 1081 const u32 head = xe_lrc_ring_head(lrc); 1082 const u32 tail = lrc->ring.tail; 1083 const u32 size = lrc->ring.size; 1084 1085 return ((head - tail - 1) & (size - 1)) + 1; 1086 } 1087 1088 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 1089 const void *data, size_t size) 1090 { 1091 struct xe_device *xe = lrc_to_xe(lrc); 1092 1093 iosys_map_incr(&ring, lrc->ring.tail); 1094 xe_map_memcpy_to(xe, &ring, 0, data, size); 1095 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 1096 } 1097 1098 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 1099 { 1100 struct xe_device *xe = lrc_to_xe(lrc); 1101 struct iosys_map ring; 1102 u32 rhs; 1103 size_t aligned_size; 1104 1105 xe_assert(xe, IS_ALIGNED(size, 4)); 1106 aligned_size = ALIGN(size, 8); 1107 1108 ring = __xe_lrc_ring_map(lrc); 1109 1110 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 1111 rhs = lrc->ring.size - lrc->ring.tail; 1112 if (size > rhs) { 1113 __xe_lrc_write_ring(lrc, ring, data, rhs); 1114 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 1115 } else { 1116 __xe_lrc_write_ring(lrc, ring, data, size); 1117 } 1118 1119 if (aligned_size > size) { 1120 u32 noop = MI_NOOP; 1121 1122 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 1123 } 1124 } 1125 1126 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 1127 { 1128 return lrc->desc | xe_lrc_ggtt_addr(lrc); 1129 } 1130 1131 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 1132 { 1133 return __xe_lrc_seqno_ggtt_addr(lrc); 1134 } 1135 1136 /** 1137 * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence. 1138 * 1139 * Allocate but don't initialize an lrc seqno fence. 1140 * 1141 * Return: Pointer to the allocated fence or 1142 * negative error pointer on error. 1143 */ 1144 struct dma_fence *xe_lrc_alloc_seqno_fence(void) 1145 { 1146 return xe_hw_fence_alloc(); 1147 } 1148 1149 /** 1150 * xe_lrc_free_seqno_fence() - Free an lrc seqno fence. 1151 * @fence: Pointer to the fence to free. 1152 * 1153 * Frees an lrc seqno fence that hasn't yet been 1154 * initialized. 1155 */ 1156 void xe_lrc_free_seqno_fence(struct dma_fence *fence) 1157 { 1158 xe_hw_fence_free(fence); 1159 } 1160 1161 /** 1162 * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence. 1163 * @lrc: Pointer to the lrc. 1164 * @fence: Pointer to the fence to initialize. 1165 * 1166 * Initializes a pre-allocated lrc seqno fence. 1167 * After initialization, the fence is subject to normal 1168 * dma-fence refcounting. 1169 */ 1170 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence) 1171 { 1172 xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc)); 1173 } 1174 1175 s32 xe_lrc_seqno(struct xe_lrc *lrc) 1176 { 1177 struct iosys_map map = __xe_lrc_seqno_map(lrc); 1178 1179 return xe_map_read32(lrc_to_xe(lrc), &map); 1180 } 1181 1182 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 1183 { 1184 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 1185 1186 return xe_map_read32(lrc_to_xe(lrc), &map); 1187 } 1188 1189 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 1190 { 1191 return __xe_lrc_start_seqno_ggtt_addr(lrc); 1192 } 1193 1194 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 1195 { 1196 return __xe_lrc_parallel_ggtt_addr(lrc); 1197 } 1198 1199 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 1200 { 1201 return __xe_lrc_parallel_map(lrc); 1202 } 1203 1204 static int instr_dw(u32 cmd_header) 1205 { 1206 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 1207 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 1208 GFXPIPE_SINGLE_DW_CMD(0, 0)) 1209 return 1; 1210 1211 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 1212 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 1213 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 1214 1215 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 1216 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 1217 } 1218 1219 static int dump_mi_command(struct drm_printer *p, 1220 struct xe_gt *gt, 1221 u32 *dw, 1222 int remaining_dw) 1223 { 1224 u32 inst_header = *dw; 1225 u32 numdw = instr_dw(inst_header); 1226 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 1227 int num_noop; 1228 1229 /* First check for commands that don't have/use a '# DW' field */ 1230 switch (inst_header & MI_OPCODE) { 1231 case MI_NOOP: 1232 num_noop = 1; 1233 while (num_noop < remaining_dw && 1234 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 1235 num_noop++; 1236 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 1237 return num_noop; 1238 1239 case MI_TOPOLOGY_FILTER: 1240 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 1241 return 1; 1242 1243 case MI_BATCH_BUFFER_END: 1244 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 1245 /* Return 'remaining_dw' to consume the rest of the LRC */ 1246 return remaining_dw; 1247 } 1248 1249 /* 1250 * Any remaining commands include a # of dwords. We should make sure 1251 * it doesn't exceed the remaining size of the LRC. 1252 */ 1253 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1254 numdw = remaining_dw; 1255 1256 switch (inst_header & MI_OPCODE) { 1257 case MI_LOAD_REGISTER_IMM: 1258 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 1259 inst_header, (numdw - 1) / 2); 1260 for (int i = 1; i < numdw; i += 2) 1261 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 1262 return numdw; 1263 1264 case MI_LOAD_REGISTER_MEM & MI_OPCODE: 1265 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", 1266 inst_header, 1267 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", 1268 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); 1269 if (numdw == 4) 1270 drm_printf(p, " - %#6x = %#010llx\n", 1271 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); 1272 else 1273 drm_printf(p, " - %*ph (%s)\n", 1274 (int)sizeof(u32) * (numdw - 1), dw + 1, 1275 numdw < 4 ? "truncated" : "malformed"); 1276 return numdw; 1277 1278 case MI_FORCE_WAKEUP: 1279 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 1280 return numdw; 1281 1282 default: 1283 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 1284 inst_header, opcode, numdw); 1285 return numdw; 1286 } 1287 } 1288 1289 static int dump_gfxpipe_command(struct drm_printer *p, 1290 struct xe_gt *gt, 1291 u32 *dw, 1292 int remaining_dw) 1293 { 1294 u32 numdw = instr_dw(*dw); 1295 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 1296 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 1297 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 1298 1299 /* 1300 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1301 * remaining size of the LRC. 1302 */ 1303 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1304 numdw = remaining_dw; 1305 1306 switch (*dw & GFXPIPE_MATCH_MASK) { 1307 #define MATCH(cmd) \ 1308 case cmd: \ 1309 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1310 return numdw 1311 #define MATCH3D(cmd) \ 1312 case CMD_##cmd: \ 1313 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1314 return numdw 1315 1316 MATCH(STATE_BASE_ADDRESS); 1317 MATCH(STATE_SIP); 1318 MATCH(GPGPU_CSR_BASE_ADDRESS); 1319 MATCH(STATE_COMPUTE_MODE); 1320 MATCH3D(3DSTATE_BTD); 1321 MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); 1322 MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); 1323 1324 MATCH3D(3DSTATE_VF_STATISTICS); 1325 1326 MATCH(PIPELINE_SELECT); 1327 1328 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1329 MATCH3D(3DSTATE_CLEAR_PARAMS); 1330 MATCH3D(3DSTATE_DEPTH_BUFFER); 1331 MATCH3D(3DSTATE_STENCIL_BUFFER); 1332 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1333 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1334 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1335 MATCH3D(3DSTATE_INDEX_BUFFER); 1336 MATCH3D(3DSTATE_VF); 1337 MATCH3D(3DSTATE_MULTISAMPLE); 1338 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1339 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1340 MATCH3D(3DSTATE_VS); 1341 MATCH3D(3DSTATE_GS); 1342 MATCH3D(3DSTATE_CLIP); 1343 MATCH3D(3DSTATE_SF); 1344 MATCH3D(3DSTATE_WM); 1345 MATCH3D(3DSTATE_CONSTANT_VS); 1346 MATCH3D(3DSTATE_CONSTANT_GS); 1347 MATCH3D(3DSTATE_CONSTANT_PS); 1348 MATCH3D(3DSTATE_SAMPLE_MASK); 1349 MATCH3D(3DSTATE_CONSTANT_HS); 1350 MATCH3D(3DSTATE_CONSTANT_DS); 1351 MATCH3D(3DSTATE_HS); 1352 MATCH3D(3DSTATE_TE); 1353 MATCH3D(3DSTATE_DS); 1354 MATCH3D(3DSTATE_STREAMOUT); 1355 MATCH3D(3DSTATE_SBE); 1356 MATCH3D(3DSTATE_PS); 1357 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1358 MATCH3D(3DSTATE_CPS_POINTERS); 1359 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1360 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1361 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1362 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1363 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1364 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1365 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1366 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1367 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1368 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1369 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1370 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1371 MATCH3D(3DSTATE_VF_INSTANCING); 1372 MATCH3D(3DSTATE_VF_SGVS); 1373 MATCH3D(3DSTATE_VF_TOPOLOGY); 1374 MATCH3D(3DSTATE_WM_CHROMAKEY); 1375 MATCH3D(3DSTATE_PS_BLEND); 1376 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1377 MATCH3D(3DSTATE_PS_EXTRA); 1378 MATCH3D(3DSTATE_RASTER); 1379 MATCH3D(3DSTATE_SBE_SWIZ); 1380 MATCH3D(3DSTATE_WM_HZ_OP); 1381 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1382 MATCH3D(3DSTATE_VF_SGVS_2); 1383 MATCH3D(3DSTATE_VFG); 1384 MATCH3D(3DSTATE_URB_ALLOC_VS); 1385 MATCH3D(3DSTATE_URB_ALLOC_HS); 1386 MATCH3D(3DSTATE_URB_ALLOC_DS); 1387 MATCH3D(3DSTATE_URB_ALLOC_GS); 1388 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1389 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1390 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1391 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1392 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1393 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1394 MATCH3D(3DSTATE_AMFS); 1395 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1396 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1397 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1398 MATCH3D(3DSTATE_MESH_CONTROL); 1399 MATCH3D(3DSTATE_MESH_DISTRIB); 1400 MATCH3D(3DSTATE_TASK_REDISTRIB); 1401 MATCH3D(3DSTATE_MESH_SHADER); 1402 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1403 MATCH3D(3DSTATE_TASK_CONTROL); 1404 MATCH3D(3DSTATE_TASK_SHADER); 1405 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1406 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1407 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1408 MATCH3D(3DSTATE_CLIP_MESH); 1409 MATCH3D(3DSTATE_SBE_MESH); 1410 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1411 1412 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1413 MATCH3D(3DSTATE_CHROMA_KEY); 1414 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1415 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1416 MATCH3D(3DSTATE_LINE_STIPPLE); 1417 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1418 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1419 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1420 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1421 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1422 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1423 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1424 MATCH3D(3DSTATE_SO_DECL_LIST); 1425 MATCH3D(3DSTATE_SO_BUFFER); 1426 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1427 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1428 MATCH3D(3DSTATE_3D_MODE); 1429 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1430 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1431 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1432 1433 default: 1434 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1435 *dw, pipeline, opcode, subopcode, numdw); 1436 return numdw; 1437 } 1438 } 1439 1440 static int dump_gfx_state_command(struct drm_printer *p, 1441 struct xe_gt *gt, 1442 u32 *dw, 1443 int remaining_dw) 1444 { 1445 u32 numdw = instr_dw(*dw); 1446 u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); 1447 1448 /* 1449 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1450 * remaining size of the LRC. 1451 */ 1452 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1453 numdw = remaining_dw; 1454 1455 switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { 1456 MATCH(STATE_WRITE_INLINE); 1457 1458 default: 1459 drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", 1460 *dw, opcode, numdw); 1461 return numdw; 1462 } 1463 } 1464 1465 void xe_lrc_dump_default(struct drm_printer *p, 1466 struct xe_gt *gt, 1467 enum xe_engine_class hwe_class) 1468 { 1469 u32 *dw; 1470 int remaining_dw, num_dw; 1471 1472 if (!gt->default_lrc[hwe_class]) { 1473 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1474 return; 1475 } 1476 1477 /* 1478 * Skip the beginning of the LRC since it contains the per-process 1479 * hardware status page. 1480 */ 1481 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1482 remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; 1483 1484 while (remaining_dw > 0) { 1485 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1486 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1487 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1488 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1489 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { 1490 num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); 1491 } else { 1492 num_dw = min(instr_dw(*dw), remaining_dw); 1493 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 1494 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 1495 num_dw); 1496 } 1497 1498 dw += num_dw; 1499 remaining_dw -= num_dw; 1500 } 1501 } 1502 1503 struct instr_state { 1504 u32 instr; 1505 u16 num_dw; 1506 }; 1507 1508 static const struct instr_state xe_hpg_svg_state[] = { 1509 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 1510 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 1511 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 1512 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 1513 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 1514 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 1515 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 1516 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 1517 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 1518 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 1519 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 1520 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 1521 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 1522 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 1523 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 1524 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 1525 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 1526 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 1527 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 1528 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 1529 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 1530 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 1531 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 1532 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 1533 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 1534 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 1535 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 1536 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 1537 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 1538 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 1539 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 1540 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 1541 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 1542 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 1543 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 1544 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 1545 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 1546 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 1547 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 1548 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 1549 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 1550 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 1551 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 1552 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 1553 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 1554 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 1555 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 1556 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 1557 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 1558 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 1559 }; 1560 1561 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) 1562 { 1563 struct xe_gt *gt = q->hwe->gt; 1564 struct xe_device *xe = gt_to_xe(gt); 1565 const struct instr_state *state_table = NULL; 1566 int state_table_size = 0; 1567 1568 /* 1569 * Wa_14019789679 1570 * 1571 * If the driver doesn't explicitly emit the SVG instructions while 1572 * setting up the default LRC, the context switch will write 0's 1573 * (noops) into the LRC memory rather than the expected instruction 1574 * headers. Application contexts start out as a copy of the default 1575 * LRC, and if they also do not emit specific settings for some SVG 1576 * state, then on context restore they'll unintentionally inherit 1577 * whatever state setting the previous context had programmed into the 1578 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will 1579 * prevent the hardware from resetting that state back to any specific 1580 * value). 1581 * 1582 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL 1583 * since that's a specific state setting that can easily cause GPU 1584 * hangs if unintentionally inherited. However to be safe we'll 1585 * continue to emit all of the SVG state since it's best not to leak 1586 * any of the state between contexts, even if that leakage is harmless. 1587 */ 1588 if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { 1589 state_table = xe_hpg_svg_state; 1590 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1591 } 1592 1593 if (!state_table) { 1594 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1595 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1596 return; 1597 } 1598 1599 for (int i = 0; i < state_table_size; i++) { 1600 u32 instr = state_table[i].instr; 1601 u16 num_dw = state_table[i].num_dw; 1602 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 1603 1604 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 1605 xe_gt_assert(gt, num_dw != 0); 1606 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 1607 1608 /* 1609 * Xe2's SVG context is the same as the one on DG2 / MTL 1610 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 1611 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 1612 * Just make the replacement here rather than defining a 1613 * whole separate table for the single trivial change. 1614 */ 1615 if (GRAPHICS_VER(xe) >= 20 && 1616 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 1617 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 1618 1619 bb->cs[bb->len] = instr; 1620 if (!is_single_dw) 1621 bb->cs[bb->len] |= (num_dw - 2); 1622 1623 bb->len += num_dw; 1624 } 1625 } 1626 1627 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) 1628 { 1629 struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT); 1630 1631 if (!snapshot) 1632 return NULL; 1633 1634 if (lrc->bo->vm) 1635 xe_vm_get(lrc->bo->vm); 1636 1637 snapshot->context_desc = xe_lrc_ggtt_addr(lrc); 1638 snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); 1639 snapshot->head = xe_lrc_ring_head(lrc); 1640 snapshot->tail.internal = lrc->ring.tail; 1641 snapshot->tail.memory = xe_lrc_ring_tail(lrc); 1642 snapshot->start_seqno = xe_lrc_start_seqno(lrc); 1643 snapshot->seqno = xe_lrc_seqno(lrc); 1644 snapshot->lrc_bo = xe_bo_get(lrc->bo); 1645 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); 1646 snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; 1647 snapshot->lrc_snapshot = NULL; 1648 snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 1649 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); 1650 return snapshot; 1651 } 1652 1653 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) 1654 { 1655 struct xe_bo *bo; 1656 struct xe_vm *vm; 1657 struct iosys_map src; 1658 1659 if (!snapshot) 1660 return; 1661 1662 bo = snapshot->lrc_bo; 1663 vm = bo->vm; 1664 snapshot->lrc_bo = NULL; 1665 1666 snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); 1667 if (!snapshot->lrc_snapshot) 1668 goto put_bo; 1669 1670 xe_bo_lock(bo, false); 1671 if (!ttm_bo_vmap(&bo->ttm, &src)) { 1672 xe_map_memcpy_from(xe_bo_device(bo), 1673 snapshot->lrc_snapshot, &src, snapshot->lrc_offset, 1674 snapshot->lrc_size); 1675 ttm_bo_vunmap(&bo->ttm, &src); 1676 } else { 1677 kvfree(snapshot->lrc_snapshot); 1678 snapshot->lrc_snapshot = NULL; 1679 } 1680 xe_bo_unlock(bo); 1681 put_bo: 1682 xe_bo_put(bo); 1683 if (vm) 1684 xe_vm_put(vm); 1685 } 1686 1687 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) 1688 { 1689 unsigned long i; 1690 1691 if (!snapshot) 1692 return; 1693 1694 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); 1695 drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", 1696 snapshot->indirect_context_desc); 1697 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); 1698 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 1699 snapshot->tail.internal, snapshot->tail.memory); 1700 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); 1701 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); 1702 drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); 1703 drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); 1704 1705 if (!snapshot->lrc_snapshot) 1706 return; 1707 1708 drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); 1709 drm_puts(p, "\t[HWSP].data: "); 1710 for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { 1711 u32 *val = snapshot->lrc_snapshot + i; 1712 char dumped[ASCII85_BUFSZ]; 1713 1714 drm_puts(p, ascii85_encode(*val, dumped)); 1715 } 1716 1717 drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); 1718 drm_puts(p, "\t[HWCTX].data: "); 1719 for (; i < snapshot->lrc_size; i += sizeof(u32)) { 1720 u32 *val = snapshot->lrc_snapshot + i; 1721 char dumped[ASCII85_BUFSZ]; 1722 1723 drm_puts(p, ascii85_encode(*val, dumped)); 1724 } 1725 drm_puts(p, "\n"); 1726 } 1727 1728 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) 1729 { 1730 if (!snapshot) 1731 return; 1732 1733 kvfree(snapshot->lrc_snapshot); 1734 if (snapshot->lrc_bo) { 1735 struct xe_vm *vm; 1736 1737 vm = snapshot->lrc_bo->vm; 1738 xe_bo_put(snapshot->lrc_bo); 1739 if (vm) 1740 xe_vm_put(vm); 1741 } 1742 kfree(snapshot); 1743 } 1744 1745 /** 1746 * xe_lrc_update_timestamp() - Update ctx timestamp 1747 * @lrc: Pointer to the lrc. 1748 * @old_ts: Old timestamp value 1749 * 1750 * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and 1751 * update saved value. 1752 * 1753 * Returns: New ctx timestamp value 1754 */ 1755 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) 1756 { 1757 *old_ts = lrc->ctx_timestamp; 1758 1759 lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 1760 1761 return lrc->ctx_timestamp; 1762 } 1763