1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include <generated/xe_wa_oob.h> 9 10 #include <linux/ascii85.h> 11 12 #include "instructions/xe_mi_commands.h" 13 #include "instructions/xe_gfxpipe_commands.h" 14 #include "instructions/xe_gfx_state_commands.h" 15 #include "regs/xe_engine_regs.h" 16 #include "regs/xe_lrc_layout.h" 17 #include "xe_bb.h" 18 #include "xe_bo.h" 19 #include "xe_device.h" 20 #include "xe_drm_client.h" 21 #include "xe_exec_queue_types.h" 22 #include "xe_gt.h" 23 #include "xe_gt_printk.h" 24 #include "xe_hw_fence.h" 25 #include "xe_map.h" 26 #include "xe_memirq.h" 27 #include "xe_mmio.h" 28 #include "xe_sriov.h" 29 #include "xe_trace_lrc.h" 30 #include "xe_vm.h" 31 #include "xe_wa.h" 32 33 #define LRC_VALID BIT_ULL(0) 34 #define LRC_PRIVILEGE BIT_ULL(8) 35 #define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) 36 #define LRC_LEGACY_64B_CONTEXT 3 37 38 #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) 39 #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) 40 41 #define LRC_PPHWSP_SIZE SZ_4K 42 #define LRC_INDIRECT_CTX_BO_SIZE SZ_4K 43 #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K 44 45 /* 46 * Layout of the LRC and associated data allocated as 47 * lrc->bo: 48 * 49 * Region Size 50 * +============================+=================================+ <- __xe_lrc_ring_offset() 51 * | Ring | ring_size, see | 52 * | | xe_lrc_init() | 53 * +============================+=================================+ <- __xe_lrc_pphwsp_offset() 54 * | PPHWSP (includes SW state) | 4K | 55 * +----------------------------+---------------------------------+ <- __xe_lrc_regs_offset() 56 * | Engine Context Image | n * 4K, see | 57 * | | xe_gt_lrc_size() | 58 * +----------------------------+---------------------------------+ <- __xe_lrc_indirect_ring_offset() 59 * | Indirect Ring State Page | 0 or 4k, see | 60 * | | XE_LRC_FLAG_INDIRECT_RING_STATE | 61 * +============================+=================================+ <- __xe_lrc_indirect_ctx_offset() 62 * | Indirect Context Page | 0 or 4k, see | 63 * | | XE_LRC_FLAG_INDIRECT_CTX | 64 * +============================+=================================+ <- __xe_lrc_wa_bb_offset() 65 * | WA BB Per Ctx | 4k | 66 * +============================+=================================+ <- xe_bo_size(lrc->bo) 67 */ 68 69 static struct xe_device * 70 lrc_to_xe(struct xe_lrc *lrc) 71 { 72 return gt_to_xe(lrc->fence_ctx.gt); 73 } 74 75 static bool 76 gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) 77 { 78 if (XE_GT_WA(gt, 16010904313) && 79 (class == XE_ENGINE_CLASS_RENDER || 80 class == XE_ENGINE_CLASS_COMPUTE)) 81 return true; 82 83 return false; 84 } 85 86 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) 87 { 88 struct xe_device *xe = gt_to_xe(gt); 89 size_t size; 90 91 /* Per-process HW status page (PPHWSP) */ 92 size = LRC_PPHWSP_SIZE; 93 94 /* Engine context image */ 95 switch (class) { 96 case XE_ENGINE_CLASS_RENDER: 97 if (GRAPHICS_VER(xe) >= 20) 98 size += 3 * SZ_4K; 99 else 100 size += 13 * SZ_4K; 101 break; 102 case XE_ENGINE_CLASS_COMPUTE: 103 if (GRAPHICS_VER(xe) >= 20) 104 size += 2 * SZ_4K; 105 else 106 size += 13 * SZ_4K; 107 break; 108 default: 109 WARN(1, "Unknown engine class: %d", class); 110 fallthrough; 111 case XE_ENGINE_CLASS_COPY: 112 case XE_ENGINE_CLASS_VIDEO_DECODE: 113 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 114 case XE_ENGINE_CLASS_OTHER: 115 size += 1 * SZ_4K; 116 } 117 118 /* Add indirect ring state page */ 119 if (xe_gt_has_indirect_ring_state(gt)) 120 size += LRC_INDIRECT_RING_STATE_SIZE; 121 122 return size; 123 } 124 125 /* 126 * The per-platform tables are u8-encoded in @data. Decode @data and set the 127 * addresses' offset and commands in @regs. The following encoding is used 128 * for each byte. There are 2 steps: decoding commands and decoding addresses. 129 * 130 * Commands: 131 * [7]: create NOPs - number of NOPs are set in lower bits 132 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 133 * MI_LRI_FORCE_POSTED 134 * [5:0]: Number of NOPs or registers to set values to in case of 135 * MI_LOAD_REGISTER_IMM 136 * 137 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 138 * number of registers. They are set by using the REG/REG16 macros: the former 139 * is used for offsets smaller than 0x200 while the latter is for values bigger 140 * than that. Those macros already set all the bits documented below correctly: 141 * 142 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 143 * follow, for the lower bits 144 * [6:0]: Register offset, without considering the engine base. 145 * 146 * This function only tweaks the commands and register offsets. Values are not 147 * filled out. 148 */ 149 static void set_offsets(u32 *regs, 150 const u8 *data, 151 const struct xe_hw_engine *hwe) 152 #define NOP(x) (BIT(7) | (x)) 153 #define LRI(count, flags) ((flags) << 6 | (count) | \ 154 BUILD_BUG_ON_ZERO(count >= BIT(6))) 155 #define POSTED BIT(0) 156 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 157 #define REG16(x) \ 158 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 159 (((x) >> 2) & 0x7f) 160 { 161 const u32 base = hwe->mmio_base; 162 163 while (*data) { 164 u8 count, flags; 165 166 if (*data & BIT(7)) { /* skip */ 167 count = *data++ & ~BIT(7); 168 regs += count; 169 continue; 170 } 171 172 count = *data & 0x3f; 173 flags = *data >> 6; 174 data++; 175 176 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 177 if (flags & POSTED) 178 *regs |= MI_LRI_FORCE_POSTED; 179 *regs |= MI_LRI_LRM_CS_MMIO; 180 regs++; 181 182 xe_gt_assert(hwe->gt, count); 183 do { 184 u32 offset = 0; 185 u8 v; 186 187 do { 188 v = *data++; 189 offset <<= 7; 190 offset |= v & ~BIT(7); 191 } while (v & BIT(7)); 192 193 regs[0] = base + (offset << 2); 194 regs += 2; 195 } while (--count); 196 } 197 198 *regs = MI_BATCH_BUFFER_END | BIT(0); 199 } 200 201 static const u8 gen12_xcs_offsets[] = { 202 NOP(1), 203 LRI(13, POSTED), 204 REG16(0x244), 205 REG(0x034), 206 REG(0x030), 207 REG(0x038), 208 REG(0x03c), 209 REG(0x168), 210 REG(0x140), 211 REG(0x110), 212 REG(0x1c0), 213 REG(0x1c4), 214 REG(0x1c8), 215 REG(0x180), 216 REG16(0x2b4), 217 218 NOP(5), 219 LRI(9, POSTED), 220 REG16(0x3a8), 221 REG16(0x28c), 222 REG16(0x288), 223 REG16(0x284), 224 REG16(0x280), 225 REG16(0x27c), 226 REG16(0x278), 227 REG16(0x274), 228 REG16(0x270), 229 230 0 231 }; 232 233 static const u8 dg2_xcs_offsets[] = { 234 NOP(1), 235 LRI(15, POSTED), 236 REG16(0x244), 237 REG(0x034), 238 REG(0x030), 239 REG(0x038), 240 REG(0x03c), 241 REG(0x168), 242 REG(0x140), 243 REG(0x110), 244 REG(0x1c0), 245 REG(0x1c4), 246 REG(0x1c8), 247 REG(0x180), 248 REG16(0x2b4), 249 REG(0x120), 250 REG(0x124), 251 252 NOP(1), 253 LRI(9, POSTED), 254 REG16(0x3a8), 255 REG16(0x28c), 256 REG16(0x288), 257 REG16(0x284), 258 REG16(0x280), 259 REG16(0x27c), 260 REG16(0x278), 261 REG16(0x274), 262 REG16(0x270), 263 264 0 265 }; 266 267 static const u8 gen12_rcs_offsets[] = { 268 NOP(1), 269 LRI(13, POSTED), 270 REG16(0x244), 271 REG(0x034), 272 REG(0x030), 273 REG(0x038), 274 REG(0x03c), 275 REG(0x168), 276 REG(0x140), 277 REG(0x110), 278 REG(0x1c0), 279 REG(0x1c4), 280 REG(0x1c8), 281 REG(0x180), 282 REG16(0x2b4), 283 284 NOP(5), 285 LRI(9, POSTED), 286 REG16(0x3a8), 287 REG16(0x28c), 288 REG16(0x288), 289 REG16(0x284), 290 REG16(0x280), 291 REG16(0x27c), 292 REG16(0x278), 293 REG16(0x274), 294 REG16(0x270), 295 296 LRI(3, POSTED), 297 REG(0x1b0), 298 REG16(0x5a8), 299 REG16(0x5ac), 300 301 NOP(6), 302 LRI(1, 0), 303 REG(0x0c8), 304 NOP(3 + 9 + 1), 305 306 LRI(51, POSTED), 307 REG16(0x588), 308 REG16(0x588), 309 REG16(0x588), 310 REG16(0x588), 311 REG16(0x588), 312 REG16(0x588), 313 REG(0x028), 314 REG(0x09c), 315 REG(0x0c0), 316 REG(0x178), 317 REG(0x17c), 318 REG16(0x358), 319 REG(0x170), 320 REG(0x150), 321 REG(0x154), 322 REG(0x158), 323 REG16(0x41c), 324 REG16(0x600), 325 REG16(0x604), 326 REG16(0x608), 327 REG16(0x60c), 328 REG16(0x610), 329 REG16(0x614), 330 REG16(0x618), 331 REG16(0x61c), 332 REG16(0x620), 333 REG16(0x624), 334 REG16(0x628), 335 REG16(0x62c), 336 REG16(0x630), 337 REG16(0x634), 338 REG16(0x638), 339 REG16(0x63c), 340 REG16(0x640), 341 REG16(0x644), 342 REG16(0x648), 343 REG16(0x64c), 344 REG16(0x650), 345 REG16(0x654), 346 REG16(0x658), 347 REG16(0x65c), 348 REG16(0x660), 349 REG16(0x664), 350 REG16(0x668), 351 REG16(0x66c), 352 REG16(0x670), 353 REG16(0x674), 354 REG16(0x678), 355 REG16(0x67c), 356 REG(0x068), 357 REG(0x084), 358 NOP(1), 359 360 0 361 }; 362 363 static const u8 xehp_rcs_offsets[] = { 364 NOP(1), 365 LRI(13, POSTED), 366 REG16(0x244), 367 REG(0x034), 368 REG(0x030), 369 REG(0x038), 370 REG(0x03c), 371 REG(0x168), 372 REG(0x140), 373 REG(0x110), 374 REG(0x1c0), 375 REG(0x1c4), 376 REG(0x1c8), 377 REG(0x180), 378 REG16(0x2b4), 379 380 NOP(5), 381 LRI(9, POSTED), 382 REG16(0x3a8), 383 REG16(0x28c), 384 REG16(0x288), 385 REG16(0x284), 386 REG16(0x280), 387 REG16(0x27c), 388 REG16(0x278), 389 REG16(0x274), 390 REG16(0x270), 391 392 LRI(3, POSTED), 393 REG(0x1b0), 394 REG16(0x5a8), 395 REG16(0x5ac), 396 397 NOP(6), 398 LRI(1, 0), 399 REG(0x0c8), 400 401 0 402 }; 403 404 static const u8 dg2_rcs_offsets[] = { 405 NOP(1), 406 LRI(15, POSTED), 407 REG16(0x244), 408 REG(0x034), 409 REG(0x030), 410 REG(0x038), 411 REG(0x03c), 412 REG(0x168), 413 REG(0x140), 414 REG(0x110), 415 REG(0x1c0), 416 REG(0x1c4), 417 REG(0x1c8), 418 REG(0x180), 419 REG16(0x2b4), 420 REG(0x120), 421 REG(0x124), 422 423 NOP(1), 424 LRI(9, POSTED), 425 REG16(0x3a8), 426 REG16(0x28c), 427 REG16(0x288), 428 REG16(0x284), 429 REG16(0x280), 430 REG16(0x27c), 431 REG16(0x278), 432 REG16(0x274), 433 REG16(0x270), 434 435 LRI(3, POSTED), 436 REG(0x1b0), 437 REG16(0x5a8), 438 REG16(0x5ac), 439 440 NOP(6), 441 LRI(1, 0), 442 REG(0x0c8), 443 444 0 445 }; 446 447 static const u8 mtl_rcs_offsets[] = { 448 NOP(1), 449 LRI(15, POSTED), 450 REG16(0x244), 451 REG(0x034), 452 REG(0x030), 453 REG(0x038), 454 REG(0x03c), 455 REG(0x168), 456 REG(0x140), 457 REG(0x110), 458 REG(0x1c0), 459 REG(0x1c4), 460 REG(0x1c8), 461 REG(0x180), 462 REG16(0x2b4), 463 REG(0x120), 464 REG(0x124), 465 466 NOP(1), 467 LRI(9, POSTED), 468 REG16(0x3a8), 469 REG16(0x28c), 470 REG16(0x288), 471 REG16(0x284), 472 REG16(0x280), 473 REG16(0x27c), 474 REG16(0x278), 475 REG16(0x274), 476 REG16(0x270), 477 478 NOP(2), 479 LRI(2, POSTED), 480 REG16(0x5a8), 481 REG16(0x5ac), 482 483 NOP(6), 484 LRI(1, 0), 485 REG(0x0c8), 486 487 0 488 }; 489 490 #define XE2_CTX_COMMON \ 491 NOP(1), /* [0x00] */ \ 492 LRI(15, POSTED), /* [0x01] */ \ 493 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 494 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 495 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 496 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 497 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 498 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 499 REG(0x140), /* [0x0e] BB_ADDR */ \ 500 REG(0x110), /* [0x10] BB_STATE */ \ 501 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 502 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 503 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 504 REG(0x180), /* [0x18] CCID */ \ 505 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 506 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 507 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 508 \ 509 NOP(1), /* [0x20] */ \ 510 LRI(9, POSTED), /* [0x21] */ \ 511 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 512 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 513 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 514 REG16(0x284), /* [0x28] dummy reg */ \ 515 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 516 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 517 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 518 REG16(0x274), /* [0x30] PTBP_UDW */ \ 519 REG16(0x270) /* [0x32] PTBP_LDW */ 520 521 static const u8 xe2_rcs_offsets[] = { 522 XE2_CTX_COMMON, 523 524 NOP(2), /* [0x34] */ 525 LRI(2, POSTED), /* [0x36] */ 526 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 527 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 528 529 NOP(6), /* [0x41] */ 530 LRI(1, 0), /* [0x47] */ 531 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 532 533 0 534 }; 535 536 static const u8 xe2_bcs_offsets[] = { 537 XE2_CTX_COMMON, 538 539 NOP(4 + 8 + 1), /* [0x34] */ 540 LRI(2, POSTED), /* [0x41] */ 541 REG16(0x200), /* [0x42] BCS_SWCTRL */ 542 REG16(0x204), /* [0x44] BLIT_CCTL */ 543 544 0 545 }; 546 547 static const u8 xe2_xcs_offsets[] = { 548 XE2_CTX_COMMON, 549 550 0 551 }; 552 553 static const u8 xe2_indirect_ring_state_offsets[] = { 554 NOP(1), /* [0x00] */ 555 LRI(5, POSTED), /* [0x01] */ 556 REG(0x034), /* [0x02] RING_BUFFER_HEAD */ 557 REG(0x030), /* [0x04] RING_BUFFER_TAIL */ 558 REG(0x038), /* [0x06] RING_BUFFER_START */ 559 REG(0x048), /* [0x08] RING_BUFFER_START_UDW */ 560 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ 561 562 NOP(5), /* [0x0c] */ 563 LRI(9, POSTED), /* [0x11] */ 564 REG(0x168), /* [0x12] BB_ADDR_UDW */ 565 REG(0x140), /* [0x14] BB_ADDR */ 566 REG(0x110), /* [0x16] BB_STATE */ 567 REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */ 568 REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */ 569 REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */ 570 REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */ 571 REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */ 572 REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */ 573 574 NOP(12), /* [0x00] */ 575 576 0 577 }; 578 579 #undef REG16 580 #undef REG 581 #undef LRI 582 #undef NOP 583 584 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 585 { 586 if (class == XE_ENGINE_CLASS_RENDER) { 587 if (GRAPHICS_VER(xe) >= 20) 588 return xe2_rcs_offsets; 589 else if (GRAPHICS_VERx100(xe) >= 1270) 590 return mtl_rcs_offsets; 591 else if (GRAPHICS_VERx100(xe) >= 1255) 592 return dg2_rcs_offsets; 593 else if (GRAPHICS_VERx100(xe) >= 1250) 594 return xehp_rcs_offsets; 595 else 596 return gen12_rcs_offsets; 597 } else if (class == XE_ENGINE_CLASS_COPY) { 598 if (GRAPHICS_VER(xe) >= 20) 599 return xe2_bcs_offsets; 600 else 601 return gen12_xcs_offsets; 602 } else { 603 if (GRAPHICS_VER(xe) >= 20) 604 return xe2_xcs_offsets; 605 else if (GRAPHICS_VERx100(xe) >= 1255) 606 return dg2_xcs_offsets; 607 else 608 return gen12_xcs_offsets; 609 } 610 } 611 612 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 613 { 614 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | 615 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 616 617 if (xe_gt_has_indirect_ring_state(hwe->gt)) 618 regs[CTX_CONTEXT_CONTROL] |= 619 _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); 620 } 621 622 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) 623 { 624 struct xe_memirq *memirq = >_to_tile(hwe->gt)->memirq; 625 struct xe_device *xe = gt_to_xe(hwe->gt); 626 u8 num_regs; 627 628 if (!xe_device_uses_memirq(xe)) 629 return; 630 631 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | 632 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; 633 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 634 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 635 636 num_regs = xe_device_has_msix(xe) ? 3 : 2; 637 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) | 638 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 639 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 640 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe); 641 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 642 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe); 643 644 if (xe_device_has_msix(xe)) { 645 regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr; 646 /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */ 647 } 648 } 649 650 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) 651 { 652 struct xe_device *xe = gt_to_xe(hwe->gt); 653 654 if (GRAPHICS_VERx100(xe) >= 1250) 655 return 0x70; 656 else 657 return 0x60; 658 } 659 660 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) 661 { 662 int x; 663 664 x = lrc_ring_mi_mode(hwe); 665 regs[x + 1] &= ~STOP_RING; 666 regs[x + 1] |= STOP_RING << 16; 667 } 668 669 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc) 670 { 671 return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE; 672 } 673 674 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 675 { 676 return 0; 677 } 678 679 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 680 { 681 return lrc->ring.size; 682 } 683 684 /* Make the magic macros work */ 685 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 686 #define __xe_lrc_regs_offset xe_lrc_regs_offset 687 688 #define LRC_SEQNO_PPHWSP_OFFSET 512 689 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 690 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) 691 #define LRC_ENGINE_ID_PPHWSP_OFFSET 1024 692 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 693 694 u32 xe_lrc_regs_offset(struct xe_lrc *lrc) 695 { 696 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 697 } 698 699 /** 700 * xe_lrc_reg_size() - Get size of the LRC registers area within queues 701 * @xe: the &xe_device struct instance 702 * 703 * Returns: Size of the LRC registers area for current platform 704 */ 705 size_t xe_lrc_reg_size(struct xe_device *xe) 706 { 707 if (GRAPHICS_VERx100(xe) >= 1250) 708 return 96 * sizeof(u32); 709 else 710 return 80 * sizeof(u32); 711 } 712 713 size_t xe_lrc_skip_size(struct xe_device *xe) 714 { 715 return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe); 716 } 717 718 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 719 { 720 /* The seqno is stored in the driver-defined portion of PPHWSP */ 721 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; 722 } 723 724 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 725 { 726 /* The start seqno is stored in the driver-defined portion of PPHWSP */ 727 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 728 } 729 730 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) 731 { 732 /* This is stored in the driver-defined portion of PPHWSP */ 733 return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; 734 } 735 736 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 737 { 738 /* The parallel is stored in the driver-defined portion of PPHWSP */ 739 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 740 } 741 742 static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc) 743 { 744 return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET; 745 } 746 747 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) 748 { 749 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); 750 } 751 752 static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) 753 { 754 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32); 755 } 756 757 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) 758 { 759 u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - 760 LRC_INDIRECT_RING_STATE_SIZE; 761 762 if (lrc->flags & XE_LRC_FLAG_INDIRECT_CTX) 763 offset -= LRC_INDIRECT_CTX_BO_SIZE; 764 765 return offset; 766 } 767 768 static inline u32 __xe_lrc_indirect_ctx_offset(struct xe_lrc *lrc) 769 { 770 return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_CTX_BO_SIZE; 771 } 772 773 static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) 774 { 775 return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE; 776 } 777 778 #define DECL_MAP_ADDR_HELPERS(elem) \ 779 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 780 { \ 781 struct iosys_map map = lrc->bo->vmap; \ 782 \ 783 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 784 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 785 return map; \ 786 } \ 787 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 788 { \ 789 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ 790 } \ 791 792 DECL_MAP_ADDR_HELPERS(ring) 793 DECL_MAP_ADDR_HELPERS(pphwsp) 794 DECL_MAP_ADDR_HELPERS(seqno) 795 DECL_MAP_ADDR_HELPERS(regs) 796 DECL_MAP_ADDR_HELPERS(start_seqno) 797 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) 798 DECL_MAP_ADDR_HELPERS(ctx_timestamp) 799 DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw) 800 DECL_MAP_ADDR_HELPERS(parallel) 801 DECL_MAP_ADDR_HELPERS(indirect_ring) 802 DECL_MAP_ADDR_HELPERS(engine_id) 803 804 #undef DECL_MAP_ADDR_HELPERS 805 806 /** 807 * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address 808 * @lrc: Pointer to the lrc. 809 * 810 * Returns: ctx timestamp GGTT address 811 */ 812 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) 813 { 814 return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); 815 } 816 817 /** 818 * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address 819 * @lrc: Pointer to the lrc. 820 * 821 * Returns: ctx timestamp udw GGTT address 822 */ 823 u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc) 824 { 825 return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); 826 } 827 828 /** 829 * xe_lrc_ctx_timestamp() - Read ctx timestamp value 830 * @lrc: Pointer to the lrc. 831 * 832 * Returns: ctx timestamp value 833 */ 834 u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) 835 { 836 struct xe_device *xe = lrc_to_xe(lrc); 837 struct iosys_map map; 838 u32 ldw, udw = 0; 839 840 map = __xe_lrc_ctx_timestamp_map(lrc); 841 ldw = xe_map_read32(xe, &map); 842 843 if (xe->info.has_64bit_timestamp) { 844 map = __xe_lrc_ctx_timestamp_udw_map(lrc); 845 udw = xe_map_read32(xe, &map); 846 } 847 848 return (u64)udw << 32 | ldw; 849 } 850 851 /** 852 * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address 853 * @lrc: Pointer to the lrc. 854 * 855 * Returns: ctx timestamp job GGTT address 856 */ 857 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) 858 { 859 return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); 860 } 861 862 /** 863 * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value 864 * @lrc: Pointer to the lrc. 865 * 866 * Returns: ctx timestamp job value 867 */ 868 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) 869 { 870 struct xe_device *xe = lrc_to_xe(lrc); 871 struct iosys_map map; 872 873 map = __xe_lrc_ctx_job_timestamp_map(lrc); 874 return xe_map_read32(xe, &map); 875 } 876 877 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 878 { 879 return __xe_lrc_pphwsp_ggtt_addr(lrc); 880 } 881 882 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc) 883 { 884 if (!xe_lrc_has_indirect_ring_state(lrc)) 885 return 0; 886 887 return __xe_lrc_indirect_ring_ggtt_addr(lrc); 888 } 889 890 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr) 891 { 892 struct xe_device *xe = lrc_to_xe(lrc); 893 struct iosys_map map; 894 895 map = __xe_lrc_indirect_ring_map(lrc); 896 iosys_map_incr(&map, reg_nr * sizeof(u32)); 897 return xe_map_read32(xe, &map); 898 } 899 900 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc, 901 int reg_nr, u32 val) 902 { 903 struct xe_device *xe = lrc_to_xe(lrc); 904 struct iosys_map map; 905 906 map = __xe_lrc_indirect_ring_map(lrc); 907 iosys_map_incr(&map, reg_nr * sizeof(u32)); 908 xe_map_write32(xe, &map, val); 909 } 910 911 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 912 { 913 struct xe_device *xe = lrc_to_xe(lrc); 914 struct iosys_map map; 915 916 map = __xe_lrc_regs_map(lrc); 917 iosys_map_incr(&map, reg_nr * sizeof(u32)); 918 return xe_map_read32(xe, &map); 919 } 920 921 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 922 { 923 struct xe_device *xe = lrc_to_xe(lrc); 924 struct iosys_map map; 925 926 map = __xe_lrc_regs_map(lrc); 927 iosys_map_incr(&map, reg_nr * sizeof(u32)); 928 xe_map_write32(xe, &map, val); 929 } 930 931 static void *empty_lrc_data(struct xe_hw_engine *hwe) 932 { 933 struct xe_gt *gt = hwe->gt; 934 void *data; 935 u32 *regs; 936 937 data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); 938 if (!data) 939 return NULL; 940 941 /* 1st page: Per-Process of HW status Page */ 942 regs = data + LRC_PPHWSP_SIZE; 943 set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe); 944 set_context_control(regs, hwe); 945 set_memory_based_intr(regs, hwe); 946 reset_stop_ring(regs, hwe); 947 if (xe_gt_has_indirect_ring_state(gt)) { 948 regs = data + xe_gt_lrc_size(gt, hwe->class) - 949 LRC_INDIRECT_RING_STATE_SIZE; 950 set_offsets(regs, xe2_indirect_ring_state_offsets, hwe); 951 } 952 953 return data; 954 } 955 956 /** 957 * xe_default_lrc_update_memirq_regs_with_address - Re-compute GGTT references in default LRC 958 * of given engine. 959 * @hwe: the &xe_hw_engine struct instance 960 */ 961 void xe_default_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe) 962 { 963 struct xe_gt *gt = hwe->gt; 964 u32 *regs; 965 966 if (!gt->default_lrc[hwe->class]) 967 return; 968 969 regs = gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE; 970 set_memory_based_intr(regs, hwe); 971 } 972 973 /** 974 * xe_lrc_update_memirq_regs_with_address - Re-compute GGTT references in mem interrupt data 975 * for given LRC. 976 * @lrc: the &xe_lrc struct instance 977 * @hwe: the &xe_hw_engine struct instance 978 * @regs: scratch buffer to be used as temporary storage 979 */ 980 void xe_lrc_update_memirq_regs_with_address(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 981 u32 *regs) 982 { 983 struct xe_gt *gt = hwe->gt; 984 struct iosys_map map; 985 size_t regs_len; 986 987 if (!xe_device_uses_memirq(gt_to_xe(gt))) 988 return; 989 990 map = __xe_lrc_regs_map(lrc); 991 regs_len = xe_lrc_reg_size(gt_to_xe(gt)); 992 xe_map_memcpy_from(gt_to_xe(gt), regs, &map, 0, regs_len); 993 set_memory_based_intr(regs, hwe); 994 xe_map_memcpy_to(gt_to_xe(gt), &map, 0, regs, regs_len); 995 } 996 997 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 998 { 999 u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt)); 1000 1001 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 1002 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 1003 } 1004 1005 static void xe_lrc_finish(struct xe_lrc *lrc) 1006 { 1007 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 1008 xe_bo_unpin_map_no_vm(lrc->bo); 1009 } 1010 1011 /* 1012 * wa_bb_setup_utilization() - Write commands to wa bb to assist 1013 * in calculating active context run ticks. 1014 * 1015 * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the 1016 * context, but only gets updated when the context switches out. In order to 1017 * check how long a context has been active before it switches out, two things 1018 * are required: 1019 * 1020 * (1) Determine if the context is running: 1021 * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in 1022 * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is 1023 * initialized. During a query, we just check for this value to determine if the 1024 * context is active. If the context switched out, it would overwrite this 1025 * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as 1026 * the last part of context restore, so reusing this LRC location will not 1027 * clobber anything. 1028 * 1029 * (2) Calculate the time that the context has been active for: 1030 * The CTX_TIMESTAMP ticks only when the context is active. If a context is 1031 * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization. 1032 * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific 1033 * engine instance. Since we do not know which instance the context is running 1034 * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and 1035 * store it in the PPHSWP. 1036 */ 1037 #define CONTEXT_ACTIVE 1ULL 1038 static ssize_t setup_utilization_wa(struct xe_lrc *lrc, 1039 struct xe_hw_engine *hwe, 1040 u32 *batch, 1041 size_t max_len) 1042 { 1043 u32 *cmd = batch; 1044 1045 if (xe_gt_WARN_ON(lrc->gt, max_len < 12)) 1046 return -ENOSPC; 1047 1048 *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; 1049 *cmd++ = ENGINE_ID(0).addr; 1050 *cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc); 1051 *cmd++ = 0; 1052 1053 *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); 1054 *cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc); 1055 *cmd++ = 0; 1056 *cmd++ = lower_32_bits(CONTEXT_ACTIVE); 1057 1058 if (lrc_to_xe(lrc)->info.has_64bit_timestamp) { 1059 *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); 1060 *cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); 1061 *cmd++ = 0; 1062 *cmd++ = upper_32_bits(CONTEXT_ACTIVE); 1063 } 1064 1065 return cmd - batch; 1066 } 1067 1068 static ssize_t setup_timestamp_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 1069 u32 *batch, size_t max_len) 1070 { 1071 const u32 ts_addr = __xe_lrc_ctx_timestamp_ggtt_addr(lrc); 1072 u32 *cmd = batch; 1073 1074 if (!XE_GT_WA(lrc->gt, 16010904313) || 1075 !(hwe->class == XE_ENGINE_CLASS_RENDER || 1076 hwe->class == XE_ENGINE_CLASS_COMPUTE || 1077 hwe->class == XE_ENGINE_CLASS_COPY || 1078 hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE || 1079 hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE)) 1080 return 0; 1081 1082 if (xe_gt_WARN_ON(lrc->gt, max_len < 12)) 1083 return -ENOSPC; 1084 1085 *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO | 1086 MI_LRM_ASYNC; 1087 *cmd++ = RING_CTX_TIMESTAMP(0).addr; 1088 *cmd++ = ts_addr; 1089 *cmd++ = 0; 1090 1091 *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO | 1092 MI_LRM_ASYNC; 1093 *cmd++ = RING_CTX_TIMESTAMP(0).addr; 1094 *cmd++ = ts_addr; 1095 *cmd++ = 0; 1096 1097 *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO; 1098 *cmd++ = RING_CTX_TIMESTAMP(0).addr; 1099 *cmd++ = ts_addr; 1100 *cmd++ = 0; 1101 1102 return cmd - batch; 1103 } 1104 1105 static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, 1106 struct xe_hw_engine *hwe, 1107 u32 *batch, size_t max_len) 1108 { 1109 u32 *cmd = batch; 1110 1111 if (!XE_GT_WA(lrc->gt, 18022495364) || 1112 hwe->class != XE_ENGINE_CLASS_RENDER) 1113 return 0; 1114 1115 if (xe_gt_WARN_ON(lrc->gt, max_len < 3)) 1116 return -ENOSPC; 1117 1118 *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); 1119 *cmd++ = CS_DEBUG_MODE1(0).addr; 1120 *cmd++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); 1121 1122 return cmd - batch; 1123 } 1124 1125 struct bo_setup { 1126 ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 1127 u32 *batch, size_t max_size); 1128 }; 1129 1130 struct bo_setup_state { 1131 /* Input: */ 1132 struct xe_lrc *lrc; 1133 struct xe_hw_engine *hwe; 1134 size_t max_size; 1135 size_t reserve_dw; 1136 unsigned int offset; 1137 const struct bo_setup *funcs; 1138 unsigned int num_funcs; 1139 1140 /* State: */ 1141 u32 *buffer; 1142 u32 *ptr; 1143 unsigned int written; 1144 }; 1145 1146 static int setup_bo(struct bo_setup_state *state) 1147 { 1148 ssize_t remain; 1149 1150 if (state->lrc->bo->vmap.is_iomem) { 1151 if (!state->buffer) 1152 return -ENOMEM; 1153 state->ptr = state->buffer; 1154 } else { 1155 state->ptr = state->lrc->bo->vmap.vaddr + state->offset; 1156 } 1157 1158 remain = state->max_size / sizeof(u32); 1159 1160 for (size_t i = 0; i < state->num_funcs; i++) { 1161 ssize_t len = state->funcs[i].setup(state->lrc, state->hwe, 1162 state->ptr, remain); 1163 1164 remain -= len; 1165 1166 /* 1167 * Caller has asked for at least reserve_dw to remain unused. 1168 */ 1169 if (len < 0 || 1170 xe_gt_WARN_ON(state->lrc->gt, remain < state->reserve_dw)) 1171 goto fail; 1172 1173 state->ptr += len; 1174 state->written += len; 1175 } 1176 1177 return 0; 1178 1179 fail: 1180 return -ENOSPC; 1181 } 1182 1183 static void finish_bo(struct bo_setup_state *state) 1184 { 1185 if (!state->buffer) 1186 return; 1187 1188 xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap, 1189 state->offset, state->buffer, 1190 state->written * sizeof(u32)); 1191 } 1192 1193 /** 1194 * xe_lrc_setup_wa_bb_with_scratch - Execute all wa bb setup callbacks. 1195 * @lrc: the &xe_lrc struct instance 1196 * @hwe: the &xe_hw_engine struct instance 1197 * @scratch: preallocated scratch buffer for temporary storage 1198 * Return: 0 on success, negative error code on failure 1199 */ 1200 int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *scratch) 1201 { 1202 static const struct bo_setup funcs[] = { 1203 { .setup = setup_timestamp_wa }, 1204 { .setup = setup_invalidate_state_cache_wa }, 1205 { .setup = setup_utilization_wa }, 1206 }; 1207 struct bo_setup_state state = { 1208 .lrc = lrc, 1209 .hwe = hwe, 1210 .max_size = LRC_WA_BB_SIZE, 1211 .buffer = scratch, 1212 .reserve_dw = 1, 1213 .offset = __xe_lrc_wa_bb_offset(lrc), 1214 .funcs = funcs, 1215 .num_funcs = ARRAY_SIZE(funcs), 1216 }; 1217 int ret; 1218 1219 ret = setup_bo(&state); 1220 if (ret) 1221 return ret; 1222 1223 *state.ptr++ = MI_BATCH_BUFFER_END; 1224 state.written++; 1225 1226 finish_bo(&state); 1227 1228 xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, 1229 xe_bo_ggtt_addr(lrc->bo) + state.offset + 1); 1230 1231 return 0; 1232 } 1233 1234 static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) 1235 { 1236 u32 *buf = NULL; 1237 int ret; 1238 1239 if (lrc->bo->vmap.is_iomem) 1240 buf = kmalloc(LRC_WA_BB_SIZE, GFP_KERNEL); 1241 1242 ret = xe_lrc_setup_wa_bb_with_scratch(lrc, hwe, buf); 1243 1244 kfree(buf); 1245 1246 return ret; 1247 } 1248 1249 static int 1250 setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) 1251 { 1252 static struct bo_setup rcs_funcs[] = { 1253 { .setup = setup_timestamp_wa }, 1254 }; 1255 struct bo_setup_state state = { 1256 .lrc = lrc, 1257 .hwe = hwe, 1258 .max_size = (63 * 64) /* max 63 cachelines */, 1259 .buffer = NULL, 1260 .offset = __xe_lrc_indirect_ctx_offset(lrc), 1261 }; 1262 int ret; 1263 1264 if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX)) 1265 return 0; 1266 1267 if (hwe->class == XE_ENGINE_CLASS_RENDER || 1268 hwe->class == XE_ENGINE_CLASS_COMPUTE) { 1269 state.funcs = rcs_funcs; 1270 state.num_funcs = ARRAY_SIZE(rcs_funcs); 1271 } 1272 1273 if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) 1274 return 0; 1275 1276 if (lrc->bo->vmap.is_iomem) 1277 state.buffer = kmalloc(state.max_size, GFP_KERNEL); 1278 1279 ret = setup_bo(&state); 1280 if (ret) { 1281 kfree(state.buffer); 1282 return ret; 1283 } 1284 1285 /* 1286 * Align to 64B cacheline so there's no garbage at the end for CS to 1287 * execute: size for indirect ctx must be a multiple of 64. 1288 */ 1289 while (state.written & 0xf) { 1290 *state.ptr++ = MI_NOOP; 1291 state.written++; 1292 } 1293 1294 finish_bo(&state); 1295 kfree(state.buffer); 1296 1297 xe_lrc_write_ctx_reg(lrc, 1298 CTX_CS_INDIRECT_CTX, 1299 (xe_bo_ggtt_addr(lrc->bo) + state.offset) | 1300 /* Size in CLs. */ 1301 (state.written * sizeof(u32) / 64)); 1302 xe_lrc_write_ctx_reg(lrc, 1303 CTX_CS_INDIRECT_CTX_OFFSET, 1304 CTX_INDIRECT_CTX_OFFSET_DEFAULT); 1305 1306 return 0; 1307 } 1308 1309 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 1310 struct xe_vm *vm, u32 ring_size, u16 msix_vec, 1311 u32 init_flags) 1312 { 1313 struct xe_gt *gt = hwe->gt; 1314 const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); 1315 u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; 1316 struct xe_tile *tile = gt_to_tile(gt); 1317 struct xe_device *xe = gt_to_xe(gt); 1318 struct iosys_map map; 1319 u32 arb_enable; 1320 u32 bo_flags; 1321 int err; 1322 1323 kref_init(&lrc->refcount); 1324 lrc->gt = gt; 1325 lrc->size = lrc_size; 1326 lrc->flags = 0; 1327 lrc->ring.size = ring_size; 1328 lrc->ring.tail = 0; 1329 1330 if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { 1331 lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; 1332 bo_size += LRC_INDIRECT_CTX_BO_SIZE; 1333 } 1334 1335 if (xe_gt_has_indirect_ring_state(gt)) 1336 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; 1337 1338 bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | 1339 XE_BO_FLAG_GGTT_INVALIDATE; 1340 if (vm && vm->xef) /* userspace */ 1341 bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; 1342 1343 lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, 1344 ttm_bo_type_kernel, 1345 bo_flags); 1346 if (IS_ERR(lrc->bo)) 1347 return PTR_ERR(lrc->bo); 1348 1349 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 1350 hwe->fence_irq, hwe->name); 1351 1352 /* 1353 * Init Per-Process of HW status Page, LRC / context state to known 1354 * values. If there's already a primed default_lrc, just copy it, otherwise 1355 * it's the early submission to record the lrc: build a new empty one from 1356 * scratch. 1357 */ 1358 map = __xe_lrc_pphwsp_map(lrc); 1359 if (gt->default_lrc[hwe->class]) { 1360 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 1361 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 1362 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 1363 lrc_size - LRC_PPHWSP_SIZE); 1364 } else { 1365 void *init_data = empty_lrc_data(hwe); 1366 1367 if (!init_data) { 1368 err = -ENOMEM; 1369 goto err_lrc_finish; 1370 } 1371 1372 xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size); 1373 kfree(init_data); 1374 } 1375 1376 if (vm) { 1377 xe_lrc_set_ppgtt(lrc, vm); 1378 1379 if (vm->xef) 1380 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 1381 } 1382 1383 if (xe_device_has_msix(xe)) { 1384 xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR, 1385 xe_memirq_status_ptr(&tile->memirq, hwe)); 1386 xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR, 1387 xe_memirq_source_ptr(&tile->memirq, hwe)); 1388 xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec); 1389 } 1390 1391 if (xe_gt_has_indirect_ring_state(gt)) { 1392 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, 1393 __xe_lrc_indirect_ring_ggtt_addr(lrc)); 1394 1395 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, 1396 __xe_lrc_ring_ggtt_addr(lrc)); 1397 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); 1398 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); 1399 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); 1400 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, 1401 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 1402 } else { 1403 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 1404 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); 1405 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 1406 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 1407 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 1408 } 1409 1410 if (init_flags & XE_LRC_CREATE_RUNALONE) 1411 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL, 1412 xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | 1413 _MASKED_BIT_ENABLE(CTX_CTRL_RUN_ALONE)); 1414 1415 if (init_flags & XE_LRC_CREATE_PXP) 1416 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL, 1417 xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | 1418 _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE)); 1419 1420 lrc->ctx_timestamp = 0; 1421 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); 1422 if (lrc_to_xe(lrc)->info.has_64bit_timestamp) 1423 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); 1424 1425 if (xe->info.has_asid && vm) 1426 xe_lrc_write_ctx_reg(lrc, CTX_ASID, vm->usm.asid); 1427 1428 lrc->desc = LRC_VALID; 1429 lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); 1430 /* TODO: Priority */ 1431 1432 /* While this appears to have something about privileged batches or 1433 * some such, it really just means PPGTT mode. 1434 */ 1435 if (vm) 1436 lrc->desc |= LRC_PRIVILEGE; 1437 1438 if (GRAPHICS_VERx100(xe) < 1250) { 1439 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); 1440 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); 1441 } 1442 1443 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1444 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 1445 1446 map = __xe_lrc_seqno_map(lrc); 1447 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1448 1449 map = __xe_lrc_start_seqno_map(lrc); 1450 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1451 1452 err = setup_wa_bb(lrc, hwe); 1453 if (err) 1454 goto err_lrc_finish; 1455 1456 err = setup_indirect_ctx(lrc, hwe); 1457 if (err) 1458 goto err_lrc_finish; 1459 1460 return 0; 1461 1462 err_lrc_finish: 1463 xe_lrc_finish(lrc); 1464 return err; 1465 } 1466 1467 /** 1468 * xe_lrc_create - Create a LRC 1469 * @hwe: Hardware Engine 1470 * @vm: The VM (address space) 1471 * @ring_size: LRC ring size 1472 * @msix_vec: MSI-X interrupt vector (for platforms that support it) 1473 * @flags: LRC initialization flags 1474 * 1475 * Allocate and initialize the Logical Ring Context (LRC). 1476 * 1477 * Return pointer to created LRC upon success and an error pointer 1478 * upon failure. 1479 */ 1480 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, 1481 u32 ring_size, u16 msix_vec, u32 flags) 1482 { 1483 struct xe_lrc *lrc; 1484 int err; 1485 1486 lrc = kzalloc(sizeof(*lrc), GFP_KERNEL); 1487 if (!lrc) 1488 return ERR_PTR(-ENOMEM); 1489 1490 err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags); 1491 if (err) { 1492 kfree(lrc); 1493 return ERR_PTR(err); 1494 } 1495 1496 return lrc; 1497 } 1498 1499 /** 1500 * xe_lrc_destroy - Destroy the LRC 1501 * @ref: reference to LRC 1502 * 1503 * Called when ref == 0, release resources held by the Logical Ring Context 1504 * (LRC) and free the LRC memory. 1505 */ 1506 void xe_lrc_destroy(struct kref *ref) 1507 { 1508 struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount); 1509 1510 xe_lrc_finish(lrc); 1511 kfree(lrc); 1512 } 1513 1514 /** 1515 * xe_lrc_update_hwctx_regs_with_address - Re-compute GGTT references within given LRC. 1516 * @lrc: the &xe_lrc struct instance 1517 */ 1518 void xe_lrc_update_hwctx_regs_with_address(struct xe_lrc *lrc) 1519 { 1520 if (xe_lrc_has_indirect_ring_state(lrc)) { 1521 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, 1522 __xe_lrc_indirect_ring_ggtt_addr(lrc)); 1523 1524 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, 1525 __xe_lrc_ring_ggtt_addr(lrc)); 1526 } else { 1527 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 1528 } 1529 } 1530 1531 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) 1532 { 1533 if (xe_lrc_has_indirect_ring_state(lrc)) 1534 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail); 1535 else 1536 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail); 1537 } 1538 1539 u32 xe_lrc_ring_tail(struct xe_lrc *lrc) 1540 { 1541 if (xe_lrc_has_indirect_ring_state(lrc)) 1542 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR; 1543 else 1544 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; 1545 } 1546 1547 static u32 xe_lrc_ring_start(struct xe_lrc *lrc) 1548 { 1549 if (xe_lrc_has_indirect_ring_state(lrc)) 1550 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START); 1551 else 1552 return xe_lrc_read_ctx_reg(lrc, CTX_RING_START); 1553 } 1554 1555 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 1556 { 1557 if (xe_lrc_has_indirect_ring_state(lrc)) 1558 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head); 1559 else 1560 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 1561 } 1562 1563 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 1564 { 1565 if (xe_lrc_has_indirect_ring_state(lrc)) 1566 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR; 1567 else 1568 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 1569 } 1570 1571 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 1572 { 1573 const u32 head = xe_lrc_ring_head(lrc); 1574 const u32 tail = lrc->ring.tail; 1575 const u32 size = lrc->ring.size; 1576 1577 return ((head - tail - 1) & (size - 1)) + 1; 1578 } 1579 1580 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 1581 const void *data, size_t size) 1582 { 1583 struct xe_device *xe = lrc_to_xe(lrc); 1584 1585 iosys_map_incr(&ring, lrc->ring.tail); 1586 xe_map_memcpy_to(xe, &ring, 0, data, size); 1587 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 1588 } 1589 1590 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 1591 { 1592 struct xe_device *xe = lrc_to_xe(lrc); 1593 struct iosys_map ring; 1594 u32 rhs; 1595 size_t aligned_size; 1596 1597 xe_assert(xe, IS_ALIGNED(size, 4)); 1598 aligned_size = ALIGN(size, 8); 1599 1600 ring = __xe_lrc_ring_map(lrc); 1601 1602 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 1603 rhs = lrc->ring.size - lrc->ring.tail; 1604 if (size > rhs) { 1605 __xe_lrc_write_ring(lrc, ring, data, rhs); 1606 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 1607 } else { 1608 __xe_lrc_write_ring(lrc, ring, data, size); 1609 } 1610 1611 if (aligned_size > size) { 1612 u32 noop = MI_NOOP; 1613 1614 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 1615 } 1616 } 1617 1618 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 1619 { 1620 return lrc->desc | xe_lrc_ggtt_addr(lrc); 1621 } 1622 1623 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 1624 { 1625 return __xe_lrc_seqno_ggtt_addr(lrc); 1626 } 1627 1628 /** 1629 * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence. 1630 * 1631 * Allocate but don't initialize an lrc seqno fence. 1632 * 1633 * Return: Pointer to the allocated fence or 1634 * negative error pointer on error. 1635 */ 1636 struct dma_fence *xe_lrc_alloc_seqno_fence(void) 1637 { 1638 return xe_hw_fence_alloc(); 1639 } 1640 1641 /** 1642 * xe_lrc_free_seqno_fence() - Free an lrc seqno fence. 1643 * @fence: Pointer to the fence to free. 1644 * 1645 * Frees an lrc seqno fence that hasn't yet been 1646 * initialized. 1647 */ 1648 void xe_lrc_free_seqno_fence(struct dma_fence *fence) 1649 { 1650 xe_hw_fence_free(fence); 1651 } 1652 1653 /** 1654 * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence. 1655 * @lrc: Pointer to the lrc. 1656 * @fence: Pointer to the fence to initialize. 1657 * 1658 * Initializes a pre-allocated lrc seqno fence. 1659 * After initialization, the fence is subject to normal 1660 * dma-fence refcounting. 1661 */ 1662 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence) 1663 { 1664 xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc)); 1665 } 1666 1667 s32 xe_lrc_seqno(struct xe_lrc *lrc) 1668 { 1669 struct iosys_map map = __xe_lrc_seqno_map(lrc); 1670 1671 return xe_map_read32(lrc_to_xe(lrc), &map); 1672 } 1673 1674 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 1675 { 1676 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 1677 1678 return xe_map_read32(lrc_to_xe(lrc), &map); 1679 } 1680 1681 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 1682 { 1683 return __xe_lrc_start_seqno_ggtt_addr(lrc); 1684 } 1685 1686 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 1687 { 1688 return __xe_lrc_parallel_ggtt_addr(lrc); 1689 } 1690 1691 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 1692 { 1693 return __xe_lrc_parallel_map(lrc); 1694 } 1695 1696 /** 1697 * xe_lrc_engine_id() - Read engine id value 1698 * @lrc: Pointer to the lrc. 1699 * 1700 * Returns: context id value 1701 */ 1702 static u32 xe_lrc_engine_id(struct xe_lrc *lrc) 1703 { 1704 struct xe_device *xe = lrc_to_xe(lrc); 1705 struct iosys_map map; 1706 1707 map = __xe_lrc_engine_id_map(lrc); 1708 return xe_map_read32(xe, &map); 1709 } 1710 1711 static int instr_dw(u32 cmd_header) 1712 { 1713 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 1714 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 1715 GFXPIPE_SINGLE_DW_CMD(0, 0)) 1716 return 1; 1717 1718 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 1719 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 1720 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 1721 1722 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 1723 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 1724 } 1725 1726 static int dump_mi_command(struct drm_printer *p, 1727 struct xe_gt *gt, 1728 u32 *dw, 1729 int remaining_dw) 1730 { 1731 u32 inst_header = *dw; 1732 u32 numdw = instr_dw(inst_header); 1733 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 1734 int num_noop; 1735 1736 /* First check for commands that don't have/use a '# DW' field */ 1737 switch (inst_header & MI_OPCODE) { 1738 case MI_NOOP: 1739 num_noop = 1; 1740 while (num_noop < remaining_dw && 1741 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 1742 num_noop++; 1743 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); 1744 return num_noop; 1745 1746 case MI_TOPOLOGY_FILTER: 1747 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); 1748 return 1; 1749 1750 case MI_BATCH_BUFFER_END: 1751 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); 1752 /* Return 'remaining_dw' to consume the rest of the LRC */ 1753 return remaining_dw; 1754 } 1755 1756 /* 1757 * Any remaining commands include a # of dwords. We should make sure 1758 * it doesn't exceed the remaining size of the LRC. 1759 */ 1760 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1761 numdw = remaining_dw; 1762 1763 switch (inst_header & MI_OPCODE) { 1764 case MI_LOAD_REGISTER_IMM: 1765 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 1766 inst_header, (numdw - 1) / 2); 1767 for (int i = 1; i < numdw; i += 2) 1768 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); 1769 return numdw; 1770 1771 case MI_LOAD_REGISTER_MEM & MI_OPCODE: 1772 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", 1773 inst_header, 1774 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", 1775 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); 1776 if (numdw == 4) 1777 drm_printf(p, " - %#6x = %#010llx\n", 1778 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); 1779 else 1780 drm_printf(p, " - %*ph (%s)\n", 1781 (int)sizeof(u32) * (numdw - 1), dw + 1, 1782 numdw < 4 ? "truncated" : "malformed"); 1783 return numdw; 1784 1785 case MI_FORCE_WAKEUP: 1786 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); 1787 return numdw; 1788 1789 default: 1790 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", 1791 inst_header, opcode, numdw); 1792 return numdw; 1793 } 1794 } 1795 1796 static int dump_gfxpipe_command(struct drm_printer *p, 1797 struct xe_gt *gt, 1798 u32 *dw, 1799 int remaining_dw) 1800 { 1801 u32 numdw = instr_dw(*dw); 1802 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 1803 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 1804 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 1805 1806 /* 1807 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1808 * remaining size of the LRC. 1809 */ 1810 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1811 numdw = remaining_dw; 1812 1813 switch (*dw & GFXPIPE_MATCH_MASK) { 1814 #define MATCH(cmd) \ 1815 case cmd: \ 1816 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1817 return numdw 1818 #define MATCH3D(cmd) \ 1819 case CMD_##cmd: \ 1820 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ 1821 return numdw 1822 1823 MATCH(STATE_BASE_ADDRESS); 1824 MATCH(STATE_SIP); 1825 MATCH(GPGPU_CSR_BASE_ADDRESS); 1826 MATCH(STATE_COMPUTE_MODE); 1827 MATCH3D(3DSTATE_BTD); 1828 MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); 1829 MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); 1830 1831 MATCH3D(3DSTATE_VF_STATISTICS); 1832 1833 MATCH(PIPELINE_SELECT); 1834 1835 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 1836 MATCH3D(3DSTATE_CLEAR_PARAMS); 1837 MATCH3D(3DSTATE_DEPTH_BUFFER); 1838 MATCH3D(3DSTATE_STENCIL_BUFFER); 1839 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 1840 MATCH3D(3DSTATE_VERTEX_BUFFERS); 1841 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 1842 MATCH3D(3DSTATE_INDEX_BUFFER); 1843 MATCH3D(3DSTATE_VF); 1844 MATCH3D(3DSTATE_MULTISAMPLE); 1845 MATCH3D(3DSTATE_CC_STATE_POINTERS); 1846 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 1847 MATCH3D(3DSTATE_VS); 1848 MATCH3D(3DSTATE_GS); 1849 MATCH3D(3DSTATE_CLIP); 1850 MATCH3D(3DSTATE_SF); 1851 MATCH3D(3DSTATE_WM); 1852 MATCH3D(3DSTATE_CONSTANT_VS); 1853 MATCH3D(3DSTATE_CONSTANT_GS); 1854 MATCH3D(3DSTATE_CONSTANT_PS); 1855 MATCH3D(3DSTATE_SAMPLE_MASK); 1856 MATCH3D(3DSTATE_CONSTANT_HS); 1857 MATCH3D(3DSTATE_CONSTANT_DS); 1858 MATCH3D(3DSTATE_HS); 1859 MATCH3D(3DSTATE_TE); 1860 MATCH3D(3DSTATE_DS); 1861 MATCH3D(3DSTATE_STREAMOUT); 1862 MATCH3D(3DSTATE_SBE); 1863 MATCH3D(3DSTATE_PS); 1864 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 1865 MATCH3D(3DSTATE_CPS_POINTERS); 1866 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 1867 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 1868 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 1869 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 1870 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 1871 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 1872 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 1873 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 1874 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 1875 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 1876 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 1877 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 1878 MATCH3D(3DSTATE_VF_INSTANCING); 1879 MATCH3D(3DSTATE_VF_SGVS); 1880 MATCH3D(3DSTATE_VF_TOPOLOGY); 1881 MATCH3D(3DSTATE_WM_CHROMAKEY); 1882 MATCH3D(3DSTATE_PS_BLEND); 1883 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 1884 MATCH3D(3DSTATE_PS_EXTRA); 1885 MATCH3D(3DSTATE_RASTER); 1886 MATCH3D(3DSTATE_SBE_SWIZ); 1887 MATCH3D(3DSTATE_WM_HZ_OP); 1888 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 1889 MATCH3D(3DSTATE_VF_SGVS_2); 1890 MATCH3D(3DSTATE_VFG); 1891 MATCH3D(3DSTATE_URB_ALLOC_VS); 1892 MATCH3D(3DSTATE_URB_ALLOC_HS); 1893 MATCH3D(3DSTATE_URB_ALLOC_DS); 1894 MATCH3D(3DSTATE_URB_ALLOC_GS); 1895 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 1896 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 1897 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 1898 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 1899 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 1900 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 1901 MATCH3D(3DSTATE_AMFS); 1902 MATCH3D(3DSTATE_DEPTH_BOUNDS); 1903 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 1904 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 1905 MATCH3D(3DSTATE_MESH_CONTROL); 1906 MATCH3D(3DSTATE_MESH_DISTRIB); 1907 MATCH3D(3DSTATE_TASK_REDISTRIB); 1908 MATCH3D(3DSTATE_MESH_SHADER); 1909 MATCH3D(3DSTATE_MESH_SHADER_DATA); 1910 MATCH3D(3DSTATE_TASK_CONTROL); 1911 MATCH3D(3DSTATE_TASK_SHADER); 1912 MATCH3D(3DSTATE_TASK_SHADER_DATA); 1913 MATCH3D(3DSTATE_URB_ALLOC_MESH); 1914 MATCH3D(3DSTATE_URB_ALLOC_TASK); 1915 MATCH3D(3DSTATE_CLIP_MESH); 1916 MATCH3D(3DSTATE_SBE_MESH); 1917 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 1918 MATCH3D(3DSTATE_COARSE_PIXEL); 1919 1920 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 1921 MATCH3D(3DSTATE_CHROMA_KEY); 1922 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 1923 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 1924 MATCH3D(3DSTATE_LINE_STIPPLE); 1925 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 1926 MATCH3D(3DSTATE_MONOFILTER_SIZE); 1927 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 1928 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 1929 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 1930 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 1931 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 1932 MATCH3D(3DSTATE_SO_DECL_LIST); 1933 MATCH3D(3DSTATE_SO_BUFFER); 1934 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 1935 MATCH3D(3DSTATE_SAMPLE_PATTERN); 1936 MATCH3D(3DSTATE_3D_MODE); 1937 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 1938 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 1939 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 1940 1941 default: 1942 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 1943 *dw, pipeline, opcode, subopcode, numdw); 1944 return numdw; 1945 } 1946 } 1947 1948 static int dump_gfx_state_command(struct drm_printer *p, 1949 struct xe_gt *gt, 1950 u32 *dw, 1951 int remaining_dw) 1952 { 1953 u32 numdw = instr_dw(*dw); 1954 u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); 1955 1956 /* 1957 * Make sure we haven't mis-parsed a number of dwords that exceeds the 1958 * remaining size of the LRC. 1959 */ 1960 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 1961 numdw = remaining_dw; 1962 1963 switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { 1964 MATCH(STATE_WRITE_INLINE); 1965 1966 default: 1967 drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", 1968 *dw, opcode, numdw); 1969 return numdw; 1970 } 1971 } 1972 1973 void xe_lrc_dump_default(struct drm_printer *p, 1974 struct xe_gt *gt, 1975 enum xe_engine_class hwe_class) 1976 { 1977 u32 *dw; 1978 int remaining_dw, num_dw; 1979 1980 if (!gt->default_lrc[hwe_class]) { 1981 drm_printf(p, "No default LRC for class %d\n", hwe_class); 1982 return; 1983 } 1984 1985 /* 1986 * Skip the beginning of the LRC since it contains the per-process 1987 * hardware status page. 1988 */ 1989 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 1990 remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; 1991 1992 while (remaining_dw > 0) { 1993 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 1994 num_dw = dump_mi_command(p, gt, dw, remaining_dw); 1995 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 1996 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); 1997 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { 1998 num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); 1999 } else { 2000 num_dw = min(instr_dw(*dw), remaining_dw); 2001 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", 2002 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 2003 num_dw); 2004 } 2005 2006 dw += num_dw; 2007 remaining_dw -= num_dw; 2008 } 2009 } 2010 2011 struct instr_state { 2012 u32 instr; 2013 u16 num_dw; 2014 }; 2015 2016 static const struct instr_state xe_hpg_svg_state[] = { 2017 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 2018 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 2019 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 2020 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 2021 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 2022 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 2023 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 2024 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 2025 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 2026 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 2027 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 2028 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 2029 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 2030 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 2031 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 2032 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 2033 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 2034 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 2035 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 2036 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 2037 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 2038 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 2039 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 2040 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 2041 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 2042 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 2043 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 2044 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 2045 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 2046 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 2047 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 2048 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 2049 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 2050 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 2051 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 2052 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 2053 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 2054 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 2055 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 2056 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 2057 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 2058 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 2059 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 2060 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 2061 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 2062 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 2063 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 2064 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 2065 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 2066 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 2067 }; 2068 2069 u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs) 2070 { 2071 struct xe_gt *gt = q->hwe->gt; 2072 struct xe_device *xe = gt_to_xe(gt); 2073 const struct instr_state *state_table = NULL; 2074 int state_table_size = 0; 2075 2076 /* 2077 * Wa_14019789679 2078 * 2079 * If the driver doesn't explicitly emit the SVG instructions while 2080 * setting up the default LRC, the context switch will write 0's 2081 * (noops) into the LRC memory rather than the expected instruction 2082 * headers. Application contexts start out as a copy of the default 2083 * LRC, and if they also do not emit specific settings for some SVG 2084 * state, then on context restore they'll unintentionally inherit 2085 * whatever state setting the previous context had programmed into the 2086 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will 2087 * prevent the hardware from resetting that state back to any specific 2088 * value). 2089 * 2090 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL 2091 * since that's a specific state setting that can easily cause GPU 2092 * hangs if unintentionally inherited. However to be safe we'll 2093 * continue to emit all of the SVG state since it's best not to leak 2094 * any of the state between contexts, even if that leakage is harmless. 2095 */ 2096 if (XE_GT_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { 2097 state_table = xe_hpg_svg_state; 2098 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 2099 } 2100 2101 if (!state_table) { 2102 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 2103 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 2104 return cs; 2105 } 2106 2107 for (int i = 0; i < state_table_size; i++) { 2108 u32 instr = state_table[i].instr; 2109 u16 num_dw = state_table[i].num_dw; 2110 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 2111 2112 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 2113 xe_gt_assert(gt, num_dw != 0); 2114 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 2115 2116 /* 2117 * Xe2's SVG context is the same as the one on DG2 / MTL 2118 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 2119 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 2120 * Just make the replacement here rather than defining a 2121 * whole separate table for the single trivial change. 2122 */ 2123 if (GRAPHICS_VER(xe) >= 20 && 2124 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 2125 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 2126 2127 *cs = instr; 2128 if (!is_single_dw) 2129 *cs |= (num_dw - 2); 2130 2131 cs += num_dw; 2132 } 2133 2134 return cs; 2135 } 2136 2137 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) 2138 { 2139 struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT); 2140 2141 if (!snapshot) 2142 return NULL; 2143 2144 snapshot->context_desc = xe_lrc_ggtt_addr(lrc); 2145 snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc); 2146 snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); 2147 snapshot->head = xe_lrc_ring_head(lrc); 2148 snapshot->tail.internal = lrc->ring.tail; 2149 snapshot->tail.memory = xe_lrc_ring_tail(lrc); 2150 snapshot->start = xe_lrc_ring_start(lrc); 2151 snapshot->start_seqno = xe_lrc_start_seqno(lrc); 2152 snapshot->seqno = xe_lrc_seqno(lrc); 2153 snapshot->lrc_bo = xe_bo_get(lrc->bo); 2154 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); 2155 snapshot->lrc_size = lrc->size; 2156 snapshot->lrc_snapshot = NULL; 2157 snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); 2158 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); 2159 return snapshot; 2160 } 2161 2162 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) 2163 { 2164 struct xe_bo *bo; 2165 struct iosys_map src; 2166 2167 if (!snapshot) 2168 return; 2169 2170 bo = snapshot->lrc_bo; 2171 snapshot->lrc_bo = NULL; 2172 2173 snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); 2174 if (!snapshot->lrc_snapshot) 2175 goto put_bo; 2176 2177 xe_bo_lock(bo, false); 2178 if (!ttm_bo_vmap(&bo->ttm, &src)) { 2179 xe_map_memcpy_from(xe_bo_device(bo), 2180 snapshot->lrc_snapshot, &src, snapshot->lrc_offset, 2181 snapshot->lrc_size); 2182 ttm_bo_vunmap(&bo->ttm, &src); 2183 } else { 2184 kvfree(snapshot->lrc_snapshot); 2185 snapshot->lrc_snapshot = NULL; 2186 } 2187 xe_bo_unlock(bo); 2188 put_bo: 2189 xe_bo_put(bo); 2190 } 2191 2192 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) 2193 { 2194 unsigned long i; 2195 2196 if (!snapshot) 2197 return; 2198 2199 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); 2200 drm_printf(p, "\tHW Ring address: 0x%08x\n", 2201 snapshot->ring_addr); 2202 drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", 2203 snapshot->indirect_context_desc); 2204 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); 2205 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 2206 snapshot->tail.internal, snapshot->tail.memory); 2207 drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start); 2208 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); 2209 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); 2210 drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); 2211 drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); 2212 2213 if (!snapshot->lrc_snapshot) 2214 return; 2215 2216 drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); 2217 drm_puts(p, "\t[HWSP].data: "); 2218 for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { 2219 u32 *val = snapshot->lrc_snapshot + i; 2220 char dumped[ASCII85_BUFSZ]; 2221 2222 drm_puts(p, ascii85_encode(*val, dumped)); 2223 } 2224 2225 drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); 2226 drm_puts(p, "\t[HWCTX].data: "); 2227 for (; i < snapshot->lrc_size; i += sizeof(u32)) { 2228 u32 *val = snapshot->lrc_snapshot + i; 2229 char dumped[ASCII85_BUFSZ]; 2230 2231 drm_puts(p, ascii85_encode(*val, dumped)); 2232 } 2233 drm_puts(p, "\n"); 2234 } 2235 2236 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) 2237 { 2238 if (!snapshot) 2239 return; 2240 2241 kvfree(snapshot->lrc_snapshot); 2242 if (snapshot->lrc_bo) 2243 xe_bo_put(snapshot->lrc_bo); 2244 2245 kfree(snapshot); 2246 } 2247 2248 static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts) 2249 { 2250 u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id); 2251 u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id); 2252 struct xe_hw_engine *hwe; 2253 u64 val; 2254 2255 hwe = xe_gt_hw_engine(lrc->gt, class, instance, false); 2256 if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe), 2257 "Unexpected engine class:instance %d:%d for context utilization\n", 2258 class, instance)) 2259 return -1; 2260 2261 if (lrc_to_xe(lrc)->info.has_64bit_timestamp) 2262 val = xe_mmio_read64_2x32(&hwe->gt->mmio, 2263 RING_CTX_TIMESTAMP(hwe->mmio_base)); 2264 else 2265 val = xe_mmio_read32(&hwe->gt->mmio, 2266 RING_CTX_TIMESTAMP(hwe->mmio_base)); 2267 2268 *reg_ctx_ts = val; 2269 2270 return 0; 2271 } 2272 2273 /** 2274 * xe_lrc_update_timestamp() - Update ctx timestamp 2275 * @lrc: Pointer to the lrc. 2276 * @old_ts: Old timestamp value 2277 * 2278 * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and 2279 * update saved value. With support for active contexts, the calculation may be 2280 * slightly racy, so follow a read-again logic to ensure that the context is 2281 * still active before returning the right timestamp. 2282 * 2283 * Returns: New ctx timestamp value 2284 */ 2285 u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts) 2286 { 2287 u64 lrc_ts, reg_ts; 2288 u32 engine_id; 2289 2290 *old_ts = lrc->ctx_timestamp; 2291 2292 lrc_ts = xe_lrc_ctx_timestamp(lrc); 2293 /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */ 2294 if (IS_SRIOV_VF(lrc_to_xe(lrc))) { 2295 lrc->ctx_timestamp = lrc_ts; 2296 goto done; 2297 } 2298 2299 if (lrc_ts == CONTEXT_ACTIVE) { 2300 engine_id = xe_lrc_engine_id(lrc); 2301 if (!get_ctx_timestamp(lrc, engine_id, ®_ts)) 2302 lrc->ctx_timestamp = reg_ts; 2303 2304 /* read lrc again to ensure context is still active */ 2305 lrc_ts = xe_lrc_ctx_timestamp(lrc); 2306 } 2307 2308 /* 2309 * If context switched out, just use the lrc_ts. Note that this needs to 2310 * be a separate if condition. 2311 */ 2312 if (lrc_ts != CONTEXT_ACTIVE) 2313 lrc->ctx_timestamp = lrc_ts; 2314 2315 done: 2316 trace_xe_lrc_update_timestamp(lrc, *old_ts); 2317 2318 return lrc->ctx_timestamp; 2319 } 2320 2321 /** 2322 * xe_lrc_ring_is_idle() - LRC is idle 2323 * @lrc: Pointer to the lrc. 2324 * 2325 * Compare LRC ring head and tail to determine if idle. 2326 * 2327 * Return: True is ring is idle, False otherwise 2328 */ 2329 bool xe_lrc_ring_is_idle(struct xe_lrc *lrc) 2330 { 2331 return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc); 2332 } 2333