1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include <generated/xe_wa_oob.h> 9 10 #include <linux/ascii85.h> 11 12 #include "instructions/xe_mi_commands.h" 13 #include "instructions/xe_gfxpipe_commands.h" 14 #include "instructions/xe_gfx_state_commands.h" 15 #include "regs/xe_engine_regs.h" 16 #include "regs/xe_lrc_layout.h" 17 #include "xe_bb.h" 18 #include "xe_bo.h" 19 #include "xe_device.h" 20 #include "xe_drm_client.h" 21 #include "xe_exec_queue_types.h" 22 #include "xe_gt.h" 23 #include "xe_gt_printk.h" 24 #include "xe_hw_fence.h" 25 #include "xe_map.h" 26 #include "xe_memirq.h" 27 #include "xe_sriov.h" 28 #include "xe_vm.h" 29 #include "xe_wa.h" 30 31 #define LRC_VALID BIT_ULL(0) 32 #define LRC_PRIVILEGE BIT_ULL(8) 33 #define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) 34 #define LRC_LEGACY_64B_CONTEXT 3 35 36 #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) 37 #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) 38 39 #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K 40 41 struct xe_lrc_snapshot { 42 struct xe_bo *lrc_bo; 43 void *lrc_snapshot; 44 unsigned long lrc_size, lrc_offset; 45 46 u32 context_desc; 47 u32 indirect_context_desc; 48 u32 head; 49 struct { 50 u32 internal; 51 u32 memory; 52 } tail; 53 u32 start_seqno; 54 u32 seqno; 55 u32 ctx_timestamp; 56 u32 ctx_job_timestamp; 57 }; 58 59 static struct xe_device * 60 lrc_to_xe(struct xe_lrc *lrc) 61 { 62 return gt_to_xe(lrc->fence_ctx.gt); 63 } 64 65 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) 66 { 67 struct xe_device *xe = gt_to_xe(gt); 68 size_t size; 69 70 switch (class) { 71 case XE_ENGINE_CLASS_RENDER: 72 if (GRAPHICS_VER(xe) >= 20) 73 size = 4 * SZ_4K; 74 else 75 size = 14 * SZ_4K; 76 break; 77 case XE_ENGINE_CLASS_COMPUTE: 78 /* 14 pages since graphics_ver == 11 */ 79 if (GRAPHICS_VER(xe) >= 20) 80 size = 3 * SZ_4K; 81 else 82 size = 14 * SZ_4K; 83 break; 84 default: 85 WARN(1, "Unknown engine class: %d", class); 86 fallthrough; 87 case XE_ENGINE_CLASS_COPY: 88 case XE_ENGINE_CLASS_VIDEO_DECODE: 89 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 90 case XE_ENGINE_CLASS_OTHER: 91 size = 2 * SZ_4K; 92 } 93 94 /* Add indirect ring state page */ 95 if (xe_gt_has_indirect_ring_state(gt)) 96 size += LRC_INDIRECT_RING_STATE_SIZE; 97 98 return size; 99 } 100 101 /* 102 * The per-platform tables are u8-encoded in @data. Decode @data and set the 103 * addresses' offset and commands in @regs. The following encoding is used 104 * for each byte. There are 2 steps: decoding commands and decoding addresses. 105 * 106 * Commands: 107 * [7]: create NOPs - number of NOPs are set in lower bits 108 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 109 * MI_LRI_FORCE_POSTED 110 * [5:0]: Number of NOPs or registers to set values to in case of 111 * MI_LOAD_REGISTER_IMM 112 * 113 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 114 * number of registers. They are set by using the REG/REG16 macros: the former 115 * is used for offsets smaller than 0x200 while the latter is for values bigger 116 * than that. Those macros already set all the bits documented below correctly: 117 * 118 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 119 * follow, for the lower bits 120 * [6:0]: Register offset, without considering the engine base. 121 * 122 * This function only tweaks the commands and register offsets. Values are not 123 * filled out. 124 */ 125 static void set_offsets(u32 *regs, 126 const u8 *data, 127 const struct xe_hw_engine *hwe) 128 #define NOP(x) (BIT(7) | (x)) 129 #define LRI(count, flags) ((flags) << 6 | (count) | \ 130 BUILD_BUG_ON_ZERO(count >= BIT(6))) 131 #define POSTED BIT(0) 132 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 133 #define REG16(x) \ 134 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 135 (((x) >> 2) & 0x7f) 136 { 137 const u32 base = hwe->mmio_base; 138 139 while (*data) { 140 u8 count, flags; 141 142 if (*data & BIT(7)) { /* skip */ 143 count = *data++ & ~BIT(7); 144 regs += count; 145 continue; 146 } 147 148 count = *data & 0x3f; 149 flags = *data >> 6; 150 data++; 151 152 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 153 if (flags & POSTED) 154 *regs |= MI_LRI_FORCE_POSTED; 155 *regs |= MI_LRI_LRM_CS_MMIO; 156 regs++; 157 158 xe_gt_assert(hwe->gt, count); 159 do { 160 u32 offset = 0; 161 u8 v; 162 163 do { 164 v = *data++; 165 offset <<= 7; 166 offset |= v & ~BIT(7); 167 } while (v & BIT(7)); 168 169 regs[0] = base + (offset << 2); 170 regs += 2; 171 } while (--count); 172 } 173 174 *regs = MI_BATCH_BUFFER_END | BIT(0); 175 } 176 177 static const u8 gen12_xcs_offsets[] = { 178 NOP(1), 179 LRI(13, POSTED), 180 REG16(0x244), 181 REG(0x034), 182 REG(0x030), 183 REG(0x038), 184 REG(0x03c), 185 REG(0x168), 186 REG(0x140), 187 REG(0x110), 188 REG(0x1c0), 189 REG(0x1c4), 190 REG(0x1c8), 191 REG(0x180), 192 REG16(0x2b4), 193 194 NOP(5), 195 LRI(9, POSTED), 196 REG16(0x3a8), 197 REG16(0x28c), 198 REG16(0x288), 199 REG16(0x284), 200 REG16(0x280), 201 REG16(0x27c), 202 REG16(0x278), 203 REG16(0x274), 204 REG16(0x270), 205 206 0 207 }; 208 209 static const u8 dg2_xcs_offsets[] = { 210 NOP(1), 211 LRI(15, POSTED), 212 REG16(0x244), 213 REG(0x034), 214 REG(0x030), 215 REG(0x038), 216 REG(0x03c), 217 REG(0x168), 218 REG(0x140), 219 REG(0x110), 220 REG(0x1c0), 221 REG(0x1c4), 222 REG(0x1c8), 223 REG(0x180), 224 REG16(0x2b4), 225 REG(0x120), 226 REG(0x124), 227 228 NOP(1), 229 LRI(9, POSTED), 230 REG16(0x3a8), 231 REG16(0x28c), 232 REG16(0x288), 233 REG16(0x284), 234 REG16(0x280), 235 REG16(0x27c), 236 REG16(0x278), 237 REG16(0x274), 238 REG16(0x270), 239 240 0 241 }; 242 243 static const u8 gen12_rcs_offsets[] = { 244 NOP(1), 245 LRI(13, POSTED), 246 REG16(0x244), 247 REG(0x034), 248 REG(0x030), 249 REG(0x038), 250 REG(0x03c), 251 REG(0x168), 252 REG(0x140), 253 REG(0x110), 254 REG(0x1c0), 255 REG(0x1c4), 256 REG(0x1c8), 257 REG(0x180), 258 REG16(0x2b4), 259 260 NOP(5), 261 LRI(9, POSTED), 262 REG16(0x3a8), 263 REG16(0x28c), 264 REG16(0x288), 265 REG16(0x284), 266 REG16(0x280), 267 REG16(0x27c), 268 REG16(0x278), 269 REG16(0x274), 270 REG16(0x270), 271 272 LRI(3, POSTED), 273 REG(0x1b0), 274 REG16(0x5a8), 275 REG16(0x5ac), 276 277 NOP(6), 278 LRI(1, 0), 279 REG(0x0c8), 280 NOP(3 + 9 + 1), 281 282 LRI(51, POSTED), 283 REG16(0x588), 284 REG16(0x588), 285 REG16(0x588), 286 REG16(0x588), 287 REG16(0x588), 288 REG16(0x588), 289 REG(0x028), 290 REG(0x09c), 291 REG(0x0c0), 292 REG(0x178), 293 REG(0x17c), 294 REG16(0x358), 295 REG(0x170), 296 REG(0x150), 297 REG(0x154), 298 REG(0x158), 299 REG16(0x41c), 300 REG16(0x600), 301 REG16(0x604), 302 REG16(0x608), 303 REG16(0x60c), 304 REG16(0x610), 305 REG16(0x614), 306 REG16(0x618), 307 REG16(0x61c), 308 REG16(0x620), 309 REG16(0x624), 310 REG16(0x628), 311 REG16(0x62c), 312 REG16(0x630), 313 REG16(0x634), 314 REG16(0x638), 315 REG16(0x63c), 316 REG16(0x640), 317 REG16(0x644), 318 REG16(0x648), 319 REG16(0x64c), 320 REG16(0x650), 321 REG16(0x654), 322 REG16(0x658), 323 REG16(0x65c), 324 REG16(0x660), 325 REG16(0x664), 326 REG16(0x668), 327 REG16(0x66c), 328 REG16(0x670), 329 REG16(0x674), 330 REG16(0x678), 331 REG16(0x67c), 332 REG(0x068), 333 REG(0x084), 334 NOP(1), 335 336 0 337 }; 338 339 static const u8 xehp_rcs_offsets[] = { 340 NOP(1), 341 LRI(13, POSTED), 342 REG16(0x244), 343 REG(0x034), 344 REG(0x030), 345 REG(0x038), 346 REG(0x03c), 347 REG(0x168), 348 REG(0x140), 349 REG(0x110), 350 REG(0x1c0), 351 REG(0x1c4), 352 REG(0x1c8), 353 REG(0x180), 354 REG16(0x2b4), 355 356 NOP(5), 357 LRI(9, POSTED), 358 REG16(0x3a8), 359 REG16(0x28c), 360 REG16(0x288), 361 REG16(0x284), 362 REG16(0x280), 363 REG16(0x27c), 364 REG16(0x278), 365 REG16(0x274), 366 REG16(0x270), 367 368 LRI(3, POSTED), 369 REG(0x1b0), 370 REG16(0x5a8), 371 REG16(0x5ac), 372 373 NOP(6), 374 LRI(1, 0), 375 REG(0x0c8), 376 377 0 378 }; 379 380 static const u8 dg2_rcs_offsets[] = { 381 NOP(1), 382 LRI(15, POSTED), 383 REG16(0x244), 384 REG(0x034), 385 REG(0x030), 386 REG(0x038), 387 REG(0x03c), 388 REG(0x168), 389 REG(0x140), 390 REG(0x110), 391 REG(0x1c0), 392 REG(0x1c4), 393 REG(0x1c8), 394 REG(0x180), 395 REG16(0x2b4), 396 REG(0x120), 397 REG(0x124), 398 399 NOP(1), 400 LRI(9, POSTED), 401 REG16(0x3a8), 402 REG16(0x28c), 403 REG16(0x288), 404 REG16(0x284), 405 REG16(0x280), 406 REG16(0x27c), 407 REG16(0x278), 408 REG16(0x274), 409 REG16(0x270), 410 411 LRI(3, POSTED), 412 REG(0x1b0), 413 REG16(0x5a8), 414 REG16(0x5ac), 415 416 NOP(6), 417 LRI(1, 0), 418 REG(0x0c8), 419 420 0 421 }; 422 423 static const u8 mtl_rcs_offsets[] = { 424 NOP(1), 425 LRI(15, POSTED), 426 REG16(0x244), 427 REG(0x034), 428 REG(0x030), 429 REG(0x038), 430 REG(0x03c), 431 REG(0x168), 432 REG(0x140), 433 REG(0x110), 434 REG(0x1c0), 435 REG(0x1c4), 436 REG(0x1c8), 437 REG(0x180), 438 REG16(0x2b4), 439 REG(0x120), 440 REG(0x124), 441 442 NOP(1), 443 LRI(9, POSTED), 444 REG16(0x3a8), 445 REG16(0x28c), 446 REG16(0x288), 447 REG16(0x284), 448 REG16(0x280), 449 REG16(0x27c), 450 REG16(0x278), 451 REG16(0x274), 452 REG16(0x270), 453 454 NOP(2), 455 LRI(2, POSTED), 456 REG16(0x5a8), 457 REG16(0x5ac), 458 459 NOP(6), 460 LRI(1, 0), 461 REG(0x0c8), 462 463 0 464 }; 465 466 #define XE2_CTX_COMMON \ 467 NOP(1), /* [0x00] */ \ 468 LRI(15, POSTED), /* [0x01] */ \ 469 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 470 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 471 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 472 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 473 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 474 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 475 REG(0x140), /* [0x0e] BB_ADDR */ \ 476 REG(0x110), /* [0x10] BB_STATE */ \ 477 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 478 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 479 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 480 REG(0x180), /* [0x18] CCID */ \ 481 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 482 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 483 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 484 \ 485 NOP(1), /* [0x20] */ \ 486 LRI(9, POSTED), /* [0x21] */ \ 487 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 488 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 489 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 490 REG16(0x284), /* [0x28] dummy reg */ \ 491 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 492 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 493 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 494 REG16(0x274), /* [0x30] PTBP_UDW */ \ 495 REG16(0x270) /* [0x32] PTBP_LDW */ 496 497 static const u8 xe2_rcs_offsets[] = { 498 XE2_CTX_COMMON, 499 500 NOP(2), /* [0x34] */ 501 LRI(2, POSTED), /* [0x36] */ 502 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 503 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 504 505 NOP(6), /* [0x41] */ 506 LRI(1, 0), /* [0x47] */ 507 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 508 509 0 510 }; 511 512 static const u8 xe2_bcs_offsets[] = { 513 XE2_CTX_COMMON, 514 515 NOP(4 + 8 + 1), /* [0x34] */ 516 LRI(2, POSTED), /* [0x41] */ 517 REG16(0x200), /* [0x42] BCS_SWCTRL */ 518 REG16(0x204), /* [0x44] BLIT_CCTL */ 519 520 0 521 }; 522 523 static const u8 xe2_xcs_offsets[] = { 524 XE2_CTX_COMMON, 525 526 0 527 }; 528 529 static const u8 xe2_indirect_ring_state_offsets[] = { 530 NOP(1), /* [0x00] */ 531 LRI(5, POSTED), /* [0x01] */ 532 REG(0x034), /* [0x02] RING_BUFFER_HEAD */ 533 REG(0x030), /* [0x04] RING_BUFFER_TAIL */ 534 REG(0x038), /* [0x06] RING_BUFFER_START */ 535 REG(0x048), /* [0x08] RING_BUFFER_START_UDW */ 536 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ 537 538 NOP(5), /* [0x0c] */ 539 LRI(9, POSTED), /* [0x11] */ 540 REG(0x168), /* [0x12] BB_ADDR_UDW */ 541 REG(0x140), /* [0x14] BB_ADDR */ 542 REG(0x110), /* [0x16] BB_STATE */ 543 REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */ 544 REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */ 545 REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */ 546 REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */ 547 REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */ 548 REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */ 549 550 NOP(12), /* [0x00] */ 551 552 0 553 }; 554 555 #undef REG16 556 #undef REG 557 #undef LRI 558 #undef NOP 559 560 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 561 { 562 if (class == XE_ENGINE_CLASS_RENDER) { 563 if (GRAPHICS_VER(xe) >= 20) 564 return xe2_rcs_offsets; 565 else if (GRAPHICS_VERx100(xe) >= 1270) 566 return mtl_rcs_offsets; 567 else if (GRAPHICS_VERx100(xe) >= 1255) 568 return dg2_rcs_offsets; 569 else if (GRAPHICS_VERx100(xe) >= 1250) 570 return xehp_rcs_offsets; 571 else 572 return gen12_rcs_offsets; 573 } else if (class == XE_ENGINE_CLASS_COPY) { 574 if (GRAPHICS_VER(xe) >= 20) 575 return xe2_bcs_offsets; 576 else 577 return gen12_xcs_offsets; 578 } else { 579 if (GRAPHICS_VER(xe) >= 20) 580 return xe2_xcs_offsets; 581 else if (GRAPHICS_VERx100(xe) >= 1255) 582 return dg2_xcs_offsets; 583 else 584 return gen12_xcs_offsets; 585 } 586 } 587 588 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 589 { 590 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | 591 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 592 593 if (xe_gt_has_indirect_ring_state(hwe->gt)) 594 regs[CTX_CONTEXT_CONTROL] |= 595 _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); 596 597 /* TODO: Timestamp */ 598 } 599 600 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) 601 { 602 struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq; 603 struct xe_device *xe = gt_to_xe(hwe->gt); 604 605 if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe)) 606 return; 607 608 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | 609 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; 610 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 611 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 612 613 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | 614 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 615 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 616 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq); 617 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 618 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq); 619 } 620 621 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 622 { 623 struct xe_device *xe = gt_to_xe(hwe->gt); 624 625 if (GRAPHICS_VERx100(xe) >= 1250) 626 return 0x70; 627 else 628 return 0x60; 629 } 630 631 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 632 { 633 int x; 634 635 x = lrc_ring_mi_mode(hwe); 636 regs[x + 1] &= ~STOP_RING; 637 regs[x + 1] |= STOP_RING << 16; 638 } 639 640 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc) 641 { 642 return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE; 643 } 644 645 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 646 { 647 return 0; 648 } 649 650 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 651 { 652 return lrc->ring.size; 653 } 654 655 /* Make the magic macros work */ 656 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 657 #define __xe_lrc_regs_offset xe_lrc_regs_offset 658 659 #define LRC_SEQNO_PPHWSP_OFFSET 512 660 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 661 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) 662 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 663 #define LRC_PPHWSP_SIZE SZ_4K 664 665 u32 xe_lrc_regs_offset(struct xe_lrc *lrc) 666 { 667 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 668 } 669 670 static size_t lrc_reg_size(struct xe_device *xe) 671 { 672 if (GRAPHICS_VERx100(xe) >= 1250) 673 return 96 * sizeof(u32); 674 else 675 return 80 * sizeof(u32); 676 } 677 678 size_t xe_lrc_skip_size(struct xe_device *xe) 679 { 680 return LRC_PPHWSP_SIZE + lrc_reg_size(xe); 681 } 682 683 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 684 { 685 /* The seqno is stored in the driver-defined portion of PPHWSP */ 686 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 687 } 688 689 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 690 { 691 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 692 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 693 } 694 695 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) 696 { 697 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 698 return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; 699 } 700 701 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 702 { 703 /* The parallel is stored in the driver-defined portion of PPHWSP */ 704 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 705 } 706 707 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) 708 { 709 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); 710 } 711 712 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) 713 { 714 /* Indirect ring state page is at the very end of LRC */ 715 return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; 716 } 717 718 #define DECL_MAP_ADDR_HELPERS(elem) \ 719 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 720 { \ 721 struct iosys_map map = lrc->bo->vmap; \ 722 \ 723 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 724 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 725 return map; \ 726 } \ 727 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 728 { \ 729 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 730 } \ 731 732 DECL_MAP_ADDR_HELPERS(ring) 733 DECL_MAP_ADDR_HELPERS(pphwsp) 734 DECL_MAP_ADDR_HELPERS(seqno) 735 DECL_MAP_ADDR_HELPERS(regs) 736 DECL_MAP_ADDR_HELPERS(start_seqno) 737 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) 738 DECL_MAP_ADDR_HELPERS(ctx_timestamp) 739 DECL_MAP_ADDR_HELPERS(parallel) 740 DECL_MAP_ADDR_HELPERS(indirect_ring) 741 742 #undef DECL_MAP_ADDR_HELPERS 743 744 /** 745 * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address 746 * @lrc: Pointer to the lrc. 747 * 748 * Returns: ctx timestamp GGTT address 749 */ 750 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) 751 { 752 return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); 753 } 754 755 /** 756 * xe_lrc_ctx_timestamp() - Read ctx timestamp value 757 * @lrc: Pointer to the lrc. 758 * 759 * Returns: ctx timestamp value 760 */ 761 u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) 762 { 763 struct xe_device *xe = lrc_to_xe(lrc); 764 struct iosys_map map; 765 766 map = __xe_lrc_ctx_timestamp_map(lrc); 767 return xe_map_read32(xe, &map); 768 } 769 770 /** 771 * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address 772 * @lrc: Pointer to the lrc. 773 * 774 * Returns: ctx timestamp job GGTT address 775 */ 776 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) 777 { 778 return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); 779 } 780 781 /** 782 * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value 783 * @lrc: Pointer to the lrc. 784 * 785 * Returns: ctx timestamp job value 786 */ 787 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) 788 { 789 struct xe_device *xe = lrc_to_xe(lrc); 790 struct iosys_map map; 791 792 map = __xe_lrc_ctx_job_timestamp_map(lrc); 793 return xe_map_read32(xe, &map); 794 } 795 796 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 797 { 798 return __xe_lrc_pphwsp_ggtt_addr(lrc); 799 } 800 801 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc) 802 { 803 if (!xe_lrc_has_indirect_ring_state(lrc)) 804 return 0; 805 806 return __xe_lrc_indirect_ring_ggtt_addr(lrc); 807 } 808 809 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr) 810 { 811 struct xe_device *xe = lrc_to_xe(lrc); 812 struct iosys_map map; 813 814 map = __xe_lrc_indirect_ring_map(lrc); 815 iosys_map_incr(&map, reg_nr * sizeof(u32)); 816 return xe_map_read32(xe, &map); 817 } 818 819 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc, 820 int reg_nr, u32 val) 821 { 822 struct xe_device *xe = lrc_to_xe(lrc); 823 struct iosys_map map; 824 825 map = __xe_lrc_indirect_ring_map(lrc); 826 iosys_map_incr(&map, reg_nr * sizeof(u32)); 827 xe_map_write32(xe, &map, val); 828 } 829 830 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 831 { 832 struct xe_device *xe = lrc_to_xe(lrc); 833 struct iosys_map map; 834 835 map = __xe_lrc_regs_map(lrc); 836 iosys_map_incr(&map, reg_nr * sizeof(u32)); 837 return xe_map_read32(xe, &map); 838 } 839 840 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 841 { 842 struct xe_device *xe = lrc_to_xe(lrc); 843 struct iosys_map map; 844 845 map = __xe_lrc_regs_map(lrc); 846 iosys_map_incr(&map, reg_nr * sizeof(u32)); 847 xe_map_write32(xe, &map, val); 848 } 849 850 static void *empty_lrc_data(struct xe_hw_engine *hwe) 851 { 852 struct xe_gt *gt = hwe->gt; 853 void *data; 854 u32 *regs; 855 856 data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); 857 if (!data) 858 return NULL; 859 860 /* 1st page: Per-Process of HW status Page */ 861 regs = data + LRC_PPHWSP_SIZE; 862 set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe); 863 set_context_control(regs, hwe); 864 set_memory_based_intr(regs, hwe); 865 reset_stop_ring(regs, hwe); 866 if (xe_gt_has_indirect_ring_state(gt)) { 867 regs = data + xe_gt_lrc_size(gt, hwe->class) - 868 LRC_INDIRECT_RING_STATE_SIZE; 869 set_offsets(regs, xe2_indirect_ring_state_offsets, hwe); 870 } 871 872 return data; 873 } 874 875 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 876 { 877 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); 878 879 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 880 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 881 } 882 883 static void xe_lrc_finish(struct xe_lrc *lrc) 884 { 885 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 886 xe_bo_lock(lrc->bo, false); 887 xe_bo_unpin(lrc->bo); 888 xe_bo_unlock(lrc->bo); 889 xe_bo_put(lrc->bo); 890 } 891 892 #define PVC_CTX_ASID (0x2e + 1) 893 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 894 895 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 896 struct xe_vm *vm, u32 ring_size) 897 { 898 struct xe_gt *gt = hwe->gt; 899 struct xe_tile *tile = gt_to_tile(gt); 900 struct xe_device *xe = gt_to_xe(gt); 901 struct iosys_map map; 902 void *init_data = NULL; 903 u32 arb_enable; 904 u32 lrc_size; 905 int err; 906 907 kref_init(&lrc->refcount); 908 lrc->flags = 0; 909 lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); 910 if (xe_gt_has_indirect_ring_state(gt)) 911 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; 912 913 /* 914 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 915 * via VM bind calls. 916 */ 917 lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, 918 ttm_bo_type_kernel, 919 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 920 XE_BO_FLAG_GGTT | 921 XE_BO_FLAG_GGTT_INVALIDATE); 922 if (IS_ERR(lrc->bo)) 923 return PTR_ERR(lrc->bo); 924 925 lrc->size = lrc_size; 926 lrc->tile = gt_to_tile(hwe->gt); 927 lrc->ring.size = ring_size; 928 lrc->ring.tail = 0; 929 lrc->ctx_timestamp = 0; 930 931 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 932 hwe->fence_irq, hwe->name); 933 934 if (!gt->default_lrc[hwe->class]) { 935 init_data = empty_lrc_data(hwe); 936 if (!init_data) { 937 err = -ENOMEM; 938 goto err_lrc_finish; 939 } 940 } 941 942 /* 943 * Init Per-Process of HW status Page, LRC / context state to known 944 * values 945 */ 946 map = __xe_lrc_pphwsp_map(lrc); 947 if (!init_data) { 948 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 949 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 950 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 951 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); 952 } else { 953 xe_map_memcpy_to(xe, &map, 0, init_data, 954 xe_gt_lrc_size(gt, hwe->class)); 955 kfree(init_data); 956 } 957 958 if (vm) { 959 xe_lrc_set_ppgtt(lrc, vm); 960 961 if (vm->xef) 962 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 963 } 964 965 if (xe_gt_has_indirect_ring_state(gt)) { 966 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, 967 __xe_lrc_indirect_ring_ggtt_addr(lrc)); 968 969 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, 970 __xe_lrc_ring_ggtt_addr(lrc)); 971 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); 972 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); 973 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); 974 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, 975 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 976 } else { 977 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 978 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 979 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 980 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 981 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 982 } 983 984 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); 985 986 if (xe->info.has_asid && vm) 987 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); 988 989 lrc->desc = LRC_VALID; 990 lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); 991 /* TODO: Priority */ 992 993 /* While this appears to have something about privileged batches or 994 * some such, it really just means PPGTT mode. 995 */ 996 if (vm) 997 lrc->desc |= LRC_PRIVILEGE; 998 999 if (GRAPHICS_VERx100(xe) < 1250) { 1000 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); 1001 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); 1002 } 1003 1004 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1005 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 1006 1007 map = __xe_lrc_seqno_map(lrc); 1008 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1009 1010 map = __xe_lrc_start_seqno_map(lrc); 1011 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1012 1013 return 0; 1014 1015 err_lrc_finish: 1016 xe_lrc_finish(lrc); 1017 return err; 1018 } 1019 1020 /** 1021 * xe_lrc_create - Create a LRC 1022 * @hwe: Hardware Engine 1023 * @vm: The VM (address space) 1024 * @ring_size: LRC ring size 1025 * 1026 * Allocate and initialize the Logical Ring Context (LRC). 1027 * 1028 * Return pointer to created LRC upon success and an error pointer 1029 * upon failure. 1030 */ 1031 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, 1032 u32 ring_size) 1033 { 1034 struct xe_lrc *lrc; 1035 int err; 1036 1037 lrc = kzalloc(sizeof(*lrc), GFP_KERNEL); 1038 if (!lrc) 1039 return ERR_PTR(-ENOMEM); 1040 1041 err = xe_lrc_init(lrc, hwe, vm, ring_size); 1042 if (err) { 1043 kfree(lrc); 1044 return ERR_PTR(err); 1045 } 1046 1047 return lrc; 1048 } 1049 1050 /** 1051 * xe_lrc_destroy - Destroy the LRC 1052 * @ref: reference to LRC 1053 * 1054 * Called when ref == 0, release resources held by the Logical Ring Context 1055 * (LRC) and free the LRC memory. 1056 */ 1057 void xe_lrc_destroy(struct kref *ref) 1058 { 1059 struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount); 1060 1061 xe_lrc_finish(lrc); 1062 kfree(lrc); 1063 } 1064 1065 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) 1066 { 1067 if (xe_lrc_has_indirect_ring_state(lrc)) 1068 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail); 1069 else 1070 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail); 1071 } 1072 1073 u32 xe_lrc_ring_tail(struct xe_lrc *lrc) 1074 { 1075 if (xe_lrc_has_indirect_ring_state(lrc)) 1076 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR; 1077 else 1078 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; 1079 } 1080 1081 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 1082 { 1083 if (xe_lrc_has_indirect_ring_state(lrc)) 1084 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head); 1085 else 1086 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 1087 } 1088 1089 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 1090 { 1091 if (xe_lrc_has_indirect_ring_state(lrc)) 1092 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR; 1093 else 1094 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 1095 } 1096 1097 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 1098 { 1099 const u32 head = xe_lrc_ring_head(lrc); 1100 const u32 tail = lrc->ring.tail; 1101 const u32 size = lrc->ring.size; 1102 1103 return ((head - tail - 1) & (size - 1)) + 1; 1104 } 1105 1106 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 1107 const void *data, size_t size) 1108 { 1109 struct xe_device *xe = lrc_to_xe(lrc); 1110 1111 iosys_map_incr(&ring, lrc->ring.tail); 1112 xe_map_memcpy_to(xe, &ring, 0, data, size); 1113 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 1114 } 1115 1116 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 1117 { 1118 struct xe_device *xe = lrc_to_xe(lrc); 1119 struct iosys_map ring; 1120 u32 rhs; 1121 size_t aligned_size; 1122 1123 xe_assert(xe, IS_ALIGNED(size, 4)); 1124 aligned_size = ALIGN(size, 8); 1125 1126 ring = __xe_lrc_ring_map(lrc); 1127 1128 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 1129 rhs = lrc->ring.size - lrc->ring.tail; 1130 if (size > rhs) { 1131 __xe_lrc_write_ring(lrc, ring, data, rhs); 1132 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 1133 } else { 1134 __xe_lrc_write_ring(lrc, ring, data, size); 1135 } 1136 1137 if (aligned_size > size) { 1138 u32 noop = MI_NOOP; 1139 1140 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 1141 } 1142 } 1143 1144 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 1145 { 1146 return lrc->desc | xe_lrc_ggtt_addr(lrc); 1147 } 1148 1149 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 1150 { 1151 return __xe_lrc_seqno_ggtt_addr(lrc); 1152 } 1153 1154 /** 1155 * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence. 1156 * 1157 * Allocate but don't initialize an lrc seqno fence. 1158 * 1159 * Return: Pointer to the allocated fence or 1160 * negative error pointer on error. 1161 */ 1162 struct dma_fence *xe_lrc_alloc_seqno_fence(void) 1163 { 1164 return xe_hw_fence_alloc(); 1165 } 1166 1167 /** 1168 * xe_lrc_free_seqno_fence() - Free an lrc seqno fence. 1169 * @fence: Pointer to the fence to free. 1170 * 1171 * Frees an lrc seqno fence that hasn't yet been 1172 * initialized. 1173 */ 1174 void xe_lrc_free_seqno_fence(struct dma_fence *fence) 1175 { 1176 xe_hw_fence_free(fence); 1177 } 1178 1179 /** 1180 * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence. 1181 * @lrc: Pointer to the lrc. 1182 * @fence: Pointer to the fence to initialize. 1183 * 1184 * Initializes a pre-allocated lrc seqno fence. 1185 * After initialization, the fence is subject to normal 1186 * dma-fence refcounting. 1187 */ 1188 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence) 1189 { 1190 xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc)); 1191 } 1192 1193 s32 xe_lrc_seqno(struct xe_lrc *lrc) 1194 { 1195 struct iosys_map map = __xe_lrc_seqno_map(lrc); 1196 1197 return xe_map_read32(lrc_to_xe(lrc), &map); 1198 } 1199 1200 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 1201 { 1202 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 1203 1204 return xe_map_read32(lrc_to_xe(lrc), &map); 1205 } 1206 1207 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 1208 { 1209 return __xe_lrc_start_seqno_ggtt_addr(lrc); 1210 } 1211 1212 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 1213 { 1214 return __xe_lrc_parallel_ggtt_addr(lrc); 1215 } 1216 1217 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 1218 { 1219 return __xe_lrc_parallel_map(lrc); 1220 } 1221 1222 static int instr_dw(u32 cmd_header) 1223 { 1224 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 1225 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 1226 GFXPIPE_SINGLE_DW_CMD(0, 0)) 1227 return 1; 1228 1229 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 1230 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 1231 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 1232 1233 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 1234 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 1235 } 1236 1237 static int dump_mi_command(struct drm_printer *p, 1238 struct xe_gt *gt, 1239 u32 *dw, 1240 int remaining_dw) 1241 { 1242 u32 inst_header = *dw; 1243 u32 numdw = instr_dw(inst_header); 1244 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 1245 int num_noop; 1246 1247 /* First check for commands that don't have/use a '# DW' field */ 1248 switch (inst_header & MI_OPCODE) { 1249 case MI_NOOP: 1250 num_noop = 1; 1251 while (num_noop < remaining_dw && 1252 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 1253 num_noop++; 1254 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 1255 return num_noop; 1256 1257 case MI_TOPOLOGY_FILTER: 1258 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 1259 return 1; 1260 1261 case MI_BATCH_BUFFER_END: 1262 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 1263 /* Return 'remaining_dw' to consume the rest of the LRC */ 1264 return remaining_dw; 1265 } 1266 1267 /* 1268 * Any remaining commands include a # of dwords. We should make sure 1269 * it doesn't exceed the remaining size of the LRC. 1270 */ 1271 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1272 numdw = remaining_dw; 1273 1274 switch (inst_header & MI_OPCODE) { 1275 case MI_LOAD_REGISTER_IMM: 1276 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 1277 inst_header, (numdw - 1) / 2); 1278 for (int i = 1; i < numdw; i += 2) 1279 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 1280 return numdw; 1281 1282 case MI_LOAD_REGISTER_MEM & MI_OPCODE: 1283 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", 1284 inst_header, 1285 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", 1286 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); 1287 if (numdw == 4) 1288 drm_printf(p, " - %#6x = %#010llx\n", 1289 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); 1290 else 1291 drm_printf(p, " - %*ph (%s)\n", 1292 (int)sizeof(u32) * (numdw - 1), dw + 1, 1293 numdw < 4 ? "truncated" : "malformed"); 1294 return numdw; 1295 1296 case MI_FORCE_WAKEUP: 1297 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 1298 return numdw; 1299 1300 default: 1301 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 1302 inst_header, opcode, numdw); 1303 return numdw; 1304 } 1305 } 1306 1307 static int dump_gfxpipe_command(struct drm_printer *p, 1308 struct xe_gt *gt, 1309 u32 *dw, 1310 int remaining_dw) 1311 { 1312 u32 numdw = instr_dw(*dw); 1313 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 1314 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 1315 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 1316 1317 /* 1318 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1319 * remaining size of the LRC. 1320 */ 1321 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1322 numdw = remaining_dw; 1323 1324 switch (*dw & GFXPIPE_MATCH_MASK) { 1325 #define MATCH(cmd) \ 1326 case cmd: \ 1327 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1328 return numdw 1329 #define MATCH3D(cmd) \ 1330 case CMD_##cmd: \ 1331 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1332 return numdw 1333 1334 MATCH(STATE_BASE_ADDRESS); 1335 MATCH(STATE_SIP); 1336 MATCH(GPGPU_CSR_BASE_ADDRESS); 1337 MATCH(STATE_COMPUTE_MODE); 1338 MATCH3D(3DSTATE_BTD); 1339 MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); 1340 MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); 1341 1342 MATCH3D(3DSTATE_VF_STATISTICS); 1343 1344 MATCH(PIPELINE_SELECT); 1345 1346 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1347 MATCH3D(3DSTATE_CLEAR_PARAMS); 1348 MATCH3D(3DSTATE_DEPTH_BUFFER); 1349 MATCH3D(3DSTATE_STENCIL_BUFFER); 1350 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1351 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1352 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1353 MATCH3D(3DSTATE_INDEX_BUFFER); 1354 MATCH3D(3DSTATE_VF); 1355 MATCH3D(3DSTATE_MULTISAMPLE); 1356 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1357 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1358 MATCH3D(3DSTATE_VS); 1359 MATCH3D(3DSTATE_GS); 1360 MATCH3D(3DSTATE_CLIP); 1361 MATCH3D(3DSTATE_SF); 1362 MATCH3D(3DSTATE_WM); 1363 MATCH3D(3DSTATE_CONSTANT_VS); 1364 MATCH3D(3DSTATE_CONSTANT_GS); 1365 MATCH3D(3DSTATE_CONSTANT_PS); 1366 MATCH3D(3DSTATE_SAMPLE_MASK); 1367 MATCH3D(3DSTATE_CONSTANT_HS); 1368 MATCH3D(3DSTATE_CONSTANT_DS); 1369 MATCH3D(3DSTATE_HS); 1370 MATCH3D(3DSTATE_TE); 1371 MATCH3D(3DSTATE_DS); 1372 MATCH3D(3DSTATE_STREAMOUT); 1373 MATCH3D(3DSTATE_SBE); 1374 MATCH3D(3DSTATE_PS); 1375 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1376 MATCH3D(3DSTATE_CPS_POINTERS); 1377 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1378 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1379 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1380 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1381 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1382 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1383 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1384 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1385 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1386 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1387 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1388 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1389 MATCH3D(3DSTATE_VF_INSTANCING); 1390 MATCH3D(3DSTATE_VF_SGVS); 1391 MATCH3D(3DSTATE_VF_TOPOLOGY); 1392 MATCH3D(3DSTATE_WM_CHROMAKEY); 1393 MATCH3D(3DSTATE_PS_BLEND); 1394 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1395 MATCH3D(3DSTATE_PS_EXTRA); 1396 MATCH3D(3DSTATE_RASTER); 1397 MATCH3D(3DSTATE_SBE_SWIZ); 1398 MATCH3D(3DSTATE_WM_HZ_OP); 1399 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1400 MATCH3D(3DSTATE_VF_SGVS_2); 1401 MATCH3D(3DSTATE_VFG); 1402 MATCH3D(3DSTATE_URB_ALLOC_VS); 1403 MATCH3D(3DSTATE_URB_ALLOC_HS); 1404 MATCH3D(3DSTATE_URB_ALLOC_DS); 1405 MATCH3D(3DSTATE_URB_ALLOC_GS); 1406 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1407 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1408 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1409 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1410 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1411 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1412 MATCH3D(3DSTATE_AMFS); 1413 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1414 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1415 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1416 MATCH3D(3DSTATE_MESH_CONTROL); 1417 MATCH3D(3DSTATE_MESH_DISTRIB); 1418 MATCH3D(3DSTATE_TASK_REDISTRIB); 1419 MATCH3D(3DSTATE_MESH_SHADER); 1420 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1421 MATCH3D(3DSTATE_TASK_CONTROL); 1422 MATCH3D(3DSTATE_TASK_SHADER); 1423 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1424 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1425 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1426 MATCH3D(3DSTATE_CLIP_MESH); 1427 MATCH3D(3DSTATE_SBE_MESH); 1428 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1429 1430 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1431 MATCH3D(3DSTATE_CHROMA_KEY); 1432 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1433 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1434 MATCH3D(3DSTATE_LINE_STIPPLE); 1435 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1436 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1437 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1438 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1439 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1440 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1441 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1442 MATCH3D(3DSTATE_SO_DECL_LIST); 1443 MATCH3D(3DSTATE_SO_BUFFER); 1444 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1445 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1446 MATCH3D(3DSTATE_3D_MODE); 1447 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1448 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1449 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1450 1451 default: 1452 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1453 *dw, pipeline, opcode, subopcode, numdw); 1454 return numdw; 1455 } 1456 } 1457 1458 static int dump_gfx_state_command(struct drm_printer *p, 1459 struct xe_gt *gt, 1460 u32 *dw, 1461 int remaining_dw) 1462 { 1463 u32 numdw = instr_dw(*dw); 1464 u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); 1465 1466 /* 1467 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1468 * remaining size of the LRC. 1469 */ 1470 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1471 numdw = remaining_dw; 1472 1473 switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { 1474 MATCH(STATE_WRITE_INLINE); 1475 1476 default: 1477 drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", 1478 *dw, opcode, numdw); 1479 return numdw; 1480 } 1481 } 1482 1483 void xe_lrc_dump_default(struct drm_printer *p, 1484 struct xe_gt *gt, 1485 enum xe_engine_class hwe_class) 1486 { 1487 u32 *dw; 1488 int remaining_dw, num_dw; 1489 1490 if (!gt->default_lrc[hwe_class]) { 1491 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1492 return; 1493 } 1494 1495 /* 1496 * Skip the beginning of the LRC since it contains the per-process 1497 * hardware status page. 1498 */ 1499 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1500 remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; 1501 1502 while (remaining_dw > 0) { 1503 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1504 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1505 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1506 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1507 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { 1508 num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); 1509 } else { 1510 num_dw = min(instr_dw(*dw), remaining_dw); 1511 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 1512 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 1513 num_dw); 1514 } 1515 1516 dw += num_dw; 1517 remaining_dw -= num_dw; 1518 } 1519 } 1520 1521 struct instr_state { 1522 u32 instr; 1523 u16 num_dw; 1524 }; 1525 1526 static const struct instr_state xe_hpg_svg_state[] = { 1527 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 1528 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 1529 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 1530 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 1531 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 1532 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 1533 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 1534 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 1535 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 1536 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 1537 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 1538 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 1539 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 1540 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 1541 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 1542 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 1543 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 1544 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 1545 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 1546 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 1547 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 1548 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 1549 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 1550 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 1551 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 1552 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 1553 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 1554 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 1555 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 1556 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 1557 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 1558 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 1559 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 1560 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 1561 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 1562 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 1563 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 1564 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 1565 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 1566 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 1567 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 1568 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 1569 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 1570 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 1571 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 1572 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 1573 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 1574 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 1575 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 1576 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 1577 }; 1578 1579 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) 1580 { 1581 struct xe_gt *gt = q->hwe->gt; 1582 struct xe_device *xe = gt_to_xe(gt); 1583 const struct instr_state *state_table = NULL; 1584 int state_table_size = 0; 1585 1586 /* 1587 * Wa_14019789679 1588 * 1589 * If the driver doesn't explicitly emit the SVG instructions while 1590 * setting up the default LRC, the context switch will write 0's 1591 * (noops) into the LRC memory rather than the expected instruction 1592 * headers. Application contexts start out as a copy of the default 1593 * LRC, and if they also do not emit specific settings for some SVG 1594 * state, then on context restore they'll unintentionally inherit 1595 * whatever state setting the previous context had programmed into the 1596 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will 1597 * prevent the hardware from resetting that state back to any specific 1598 * value). 1599 * 1600 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL 1601 * since that's a specific state setting that can easily cause GPU 1602 * hangs if unintentionally inherited. However to be safe we'll 1603 * continue to emit all of the SVG state since it's best not to leak 1604 * any of the state between contexts, even if that leakage is harmless. 1605 */ 1606 if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { 1607 state_table = xe_hpg_svg_state; 1608 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 1609 } 1610 1611 if (!state_table) { 1612 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 1613 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 1614 return; 1615 } 1616 1617 for (int i = 0; i < state_table_size; i++) { 1618 u32 instr = state_table[i].instr; 1619 u16 num_dw = state_table[i].num_dw; 1620 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 1621 1622 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 1623 xe_gt_assert(gt, num_dw != 0); 1624 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 1625 1626 /* 1627 * Xe2's SVG context is the same as the one on DG2 / MTL 1628 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 1629 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 1630 * Just make the replacement here rather than defining a 1631 * whole separate table for the single trivial change. 1632 */ 1633 if (GRAPHICS_VER(xe) >= 20 && 1634 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 1635 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 1636 1637 bb->cs[bb->len] = instr; 1638 if (!is_single_dw) 1639 bb->cs[bb->len] |= (num_dw - 2); 1640 1641 bb->len += num_dw; 1642 } 1643 } 1644 1645 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) 1646 { 1647 struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT); 1648 1649 if (!snapshot) 1650 return NULL; 1651 1652 if (lrc->bo->vm) 1653 xe_vm_get(lrc->bo->vm); 1654 1655 snapshot->context_desc = xe_lrc_ggtt_addr(lrc); 1656 snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); 1657 snapshot->head = xe_lrc_ring_head(lrc); 1658 snapshot->tail.internal = lrc->ring.tail; 1659 snapshot->tail.memory = xe_lrc_ring_tail(lrc); 1660 snapshot->start_seqno = xe_lrc_start_seqno(lrc); 1661 snapshot->seqno = xe_lrc_seqno(lrc); 1662 snapshot->lrc_bo = xe_bo_get(lrc->bo); 1663 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); 1664 snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; 1665 snapshot->lrc_snapshot = NULL; 1666 snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 1667 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); 1668 return snapshot; 1669 } 1670 1671 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) 1672 { 1673 struct xe_bo *bo; 1674 struct xe_vm *vm; 1675 struct iosys_map src; 1676 1677 if (!snapshot) 1678 return; 1679 1680 bo = snapshot->lrc_bo; 1681 vm = bo->vm; 1682 snapshot->lrc_bo = NULL; 1683 1684 snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); 1685 if (!snapshot->lrc_snapshot) 1686 goto put_bo; 1687 1688 xe_bo_lock(bo, false); 1689 if (!ttm_bo_vmap(&bo->ttm, &src)) { 1690 xe_map_memcpy_from(xe_bo_device(bo), 1691 snapshot->lrc_snapshot, &src, snapshot->lrc_offset, 1692 snapshot->lrc_size); 1693 ttm_bo_vunmap(&bo->ttm, &src); 1694 } else { 1695 kvfree(snapshot->lrc_snapshot); 1696 snapshot->lrc_snapshot = NULL; 1697 } 1698 xe_bo_unlock(bo); 1699 put_bo: 1700 xe_bo_put(bo); 1701 if (vm) 1702 xe_vm_put(vm); 1703 } 1704 1705 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) 1706 { 1707 unsigned long i; 1708 1709 if (!snapshot) 1710 return; 1711 1712 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); 1713 drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", 1714 snapshot->indirect_context_desc); 1715 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); 1716 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 1717 snapshot->tail.internal, snapshot->tail.memory); 1718 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); 1719 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); 1720 drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); 1721 drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); 1722 1723 if (!snapshot->lrc_snapshot) 1724 return; 1725 1726 drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); 1727 drm_puts(p, "\t[HWSP].data: "); 1728 for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { 1729 u32 *val = snapshot->lrc_snapshot + i; 1730 char dumped[ASCII85_BUFSZ]; 1731 1732 drm_puts(p, ascii85_encode(*val, dumped)); 1733 } 1734 1735 drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); 1736 drm_puts(p, "\t[HWCTX].data: "); 1737 for (; i < snapshot->lrc_size; i += sizeof(u32)) { 1738 u32 *val = snapshot->lrc_snapshot + i; 1739 char dumped[ASCII85_BUFSZ]; 1740 1741 drm_puts(p, ascii85_encode(*val, dumped)); 1742 } 1743 drm_puts(p, "\n"); 1744 } 1745 1746 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) 1747 { 1748 if (!snapshot) 1749 return; 1750 1751 kvfree(snapshot->lrc_snapshot); 1752 if (snapshot->lrc_bo) { 1753 struct xe_vm *vm; 1754 1755 vm = snapshot->lrc_bo->vm; 1756 xe_bo_put(snapshot->lrc_bo); 1757 if (vm) 1758 xe_vm_put(vm); 1759 } 1760 kfree(snapshot); 1761 } 1762 1763 /** 1764 * xe_lrc_update_timestamp() - Update ctx timestamp 1765 * @lrc: Pointer to the lrc. 1766 * @old_ts: Old timestamp value 1767 * 1768 * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and 1769 * update saved value. 1770 * 1771 * Returns: New ctx timestamp value 1772 */ 1773 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) 1774 { 1775 *old_ts = lrc->ctx_timestamp; 1776 1777 lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 1778 1779 return lrc->ctx_timestamp; 1780 } 1781