1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_lrc.h" 7 8 #include <generated/xe_wa_oob.h> 9 10 #include <linux/ascii85.h> 11 #include <linux/panic.h> 12 13 #include "instructions/xe_mi_commands.h" 14 #include "instructions/xe_gfxpipe_commands.h" 15 #include "instructions/xe_gfx_state_commands.h" 16 #include "regs/xe_engine_regs.h" 17 #include "regs/xe_gt_regs.h" 18 #include "regs/xe_lrc_layout.h" 19 #include "xe_bb.h" 20 #include "xe_bo.h" 21 #include "xe_configfs.h" 22 #include "xe_device.h" 23 #include "xe_drm_client.h" 24 #include "xe_exec_queue.h" 25 #include "xe_exec_queue_types.h" 26 #include "xe_gt.h" 27 #include "xe_gt_clock.h" 28 #include "xe_gt_printk.h" 29 #include "xe_hw_fence.h" 30 #include "xe_map.h" 31 #include "xe_memirq.h" 32 #include "xe_mmio.h" 33 #include "xe_ring_ops.h" 34 #include "xe_sriov.h" 35 #include "xe_trace_lrc.h" 36 #include "xe_vm.h" 37 #include "xe_wa.h" 38 39 #define LRC_VALID BIT_ULL(0) 40 #define LRC_PRIVILEGE BIT_ULL(8) 41 #define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) 42 #define LRC_LEGACY_64B_CONTEXT 3 43 44 #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) 45 #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) 46 47 #define LRC_PPHWSP_SIZE SZ_4K 48 #define LRC_INDIRECT_CTX_BO_SIZE SZ_4K 49 #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K 50 51 #define LRC_PRIORITY GENMASK_ULL(10, 9) 52 #define LRC_PRIORITY_LOW 0 53 #define LRC_PRIORITY_NORMAL 1 54 #define LRC_PRIORITY_HIGH 2 55 56 /* 57 * Layout of the LRC and associated data allocated as 58 * lrc->bo: 59 * 60 * Region Size 61 * +============================+=================================+ <- __xe_lrc_ring_offset() 62 * | Ring | ring_size, see | 63 * | | xe_lrc_init() | 64 * +============================+=================================+ <- __xe_lrc_pphwsp_offset() 65 * | PPHWSP (includes SW state) | 4K | 66 * +----------------------------+---------------------------------+ <- __xe_lrc_regs_offset() 67 * | Engine Context Image | n * 4K, see | 68 * | | xe_gt_lrc_size() | 69 * +----------------------------+---------------------------------+ <- __xe_lrc_indirect_ring_offset() 70 * | Indirect Ring State Page | 0 or 4k, see | 71 * | | XE_LRC_FLAG_INDIRECT_RING_STATE | 72 * +============================+=================================+ <- __xe_lrc_indirect_ctx_offset() 73 * | Indirect Context Page | 0 or 4k, see | 74 * | | XE_LRC_FLAG_INDIRECT_CTX | 75 * +============================+=================================+ <- __xe_lrc_wa_bb_offset() 76 * | WA BB Per Ctx | 4k | 77 * +============================+=================================+ <- xe_bo_size(lrc->bo) 78 */ 79 80 static struct xe_device * 81 lrc_to_xe(struct xe_lrc *lrc) 82 { 83 return gt_to_xe(lrc->fence_ctx.gt); 84 } 85 86 static bool 87 gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) 88 { 89 struct xe_device *xe = gt_to_xe(gt); 90 91 if (XE_GT_WA(gt, 16010904313) && 92 (class == XE_ENGINE_CLASS_RENDER || 93 class == XE_ENGINE_CLASS_COMPUTE)) 94 return true; 95 96 if (xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev), 97 class, NULL)) 98 return true; 99 100 if (gt->ring_ops[class]->emit_aux_table_inv) 101 return true; 102 103 return false; 104 } 105 106 /** 107 * xe_gt_lrc_hang_replay_size() - Hang replay size 108 * @gt: The GT 109 * @class: Hardware engine class 110 * 111 * Determine size of GPU hang replay state for a GT and hardware engine class. 112 * 113 * Return: Size of GPU hang replay size 114 */ 115 size_t xe_gt_lrc_hang_replay_size(struct xe_gt *gt, enum xe_engine_class class) 116 { 117 struct xe_device *xe = gt_to_xe(gt); 118 size_t size = 0; 119 120 /* Engine context image */ 121 switch (class) { 122 case XE_ENGINE_CLASS_RENDER: 123 if (GRAPHICS_VERx100(xe) >= 3510) 124 size += 7 * SZ_4K; 125 else if (GRAPHICS_VER(xe) >= 20) 126 size += 3 * SZ_4K; 127 else 128 size += 13 * SZ_4K; 129 break; 130 case XE_ENGINE_CLASS_COMPUTE: 131 if (GRAPHICS_VERx100(xe) >= 3510) 132 size += 5 * SZ_4K; 133 else if (GRAPHICS_VER(xe) >= 20) 134 size += 2 * SZ_4K; 135 else 136 size += 13 * SZ_4K; 137 break; 138 default: 139 WARN(1, "Unknown engine class: %d", class); 140 fallthrough; 141 case XE_ENGINE_CLASS_COPY: 142 case XE_ENGINE_CLASS_VIDEO_DECODE: 143 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 144 case XE_ENGINE_CLASS_OTHER: 145 size += 1 * SZ_4K; 146 } 147 148 return size; 149 } 150 151 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) 152 { 153 size_t size = xe_gt_lrc_hang_replay_size(gt, class); 154 155 /* Add indirect ring state page */ 156 if (xe_gt_has_indirect_ring_state(gt)) 157 size += LRC_INDIRECT_RING_STATE_SIZE; 158 159 return size + LRC_PPHWSP_SIZE; 160 } 161 162 /* 163 * The per-platform tables are u8-encoded in @data. Decode @data and set the 164 * addresses' offset and commands in @regs. The following encoding is used 165 * for each byte. There are 2 steps: decoding commands and decoding addresses. 166 * 167 * Commands: 168 * [7]: create NOPs - number of NOPs are set in lower bits 169 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set 170 * MI_LRI_FORCE_POSTED 171 * [5:0]: Number of NOPs or registers to set values to in case of 172 * MI_LOAD_REGISTER_IMM 173 * 174 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" 175 * number of registers. They are set by using the REG/REG16 macros: the former 176 * is used for offsets smaller than 0x200 while the latter is for values bigger 177 * than that. Those macros already set all the bits documented below correctly: 178 * 179 * [7]: When a register offset needs more than 6 bits, use additional bytes, to 180 * follow, for the lower bits 181 * [6:0]: Register offset, without considering the engine base. 182 * 183 * This function only tweaks the commands and register offsets. Values are not 184 * filled out. 185 */ 186 static void set_offsets(u32 *regs, 187 const u8 *data, 188 const struct xe_hw_engine *hwe) 189 #define NOP(x) (BIT(7) | (x)) 190 #define LRI(count, flags) ((flags) << 6 | (count) | \ 191 BUILD_BUG_ON_ZERO(count >= BIT(6))) 192 #define POSTED BIT(0) 193 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 194 #define REG16(x) \ 195 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 196 (((x) >> 2) & 0x7f) 197 { 198 const u32 base = hwe->mmio_base; 199 200 while (*data) { 201 u8 count, flags; 202 203 if (*data & BIT(7)) { /* skip */ 204 count = *data++ & ~BIT(7); 205 regs += count; 206 continue; 207 } 208 209 count = *data & 0x3f; 210 flags = *data >> 6; 211 data++; 212 213 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); 214 if (flags & POSTED) 215 *regs |= MI_LRI_FORCE_POSTED; 216 *regs |= MI_LRI_LRM_CS_MMIO; 217 regs++; 218 219 xe_gt_assert(hwe->gt, count); 220 do { 221 u32 offset = 0; 222 u8 v; 223 224 do { 225 v = *data++; 226 offset <<= 7; 227 offset |= v & ~BIT(7); 228 } while (v & BIT(7)); 229 230 regs[0] = base + (offset << 2); 231 regs += 2; 232 } while (--count); 233 } 234 235 *regs = MI_BATCH_BUFFER_END | BIT(0); 236 } 237 238 static const u8 gen12_xcs_offsets[] = { 239 NOP(1), 240 LRI(13, POSTED), 241 REG16(0x244), 242 REG(0x034), 243 REG(0x030), 244 REG(0x038), 245 REG(0x03c), 246 REG(0x168), 247 REG(0x140), 248 REG(0x110), 249 REG(0x1c0), 250 REG(0x1c4), 251 REG(0x1c8), 252 REG(0x180), 253 REG16(0x2b4), 254 255 NOP(5), 256 LRI(9, POSTED), 257 REG16(0x3a8), 258 REG16(0x28c), 259 REG16(0x288), 260 REG16(0x284), 261 REG16(0x280), 262 REG16(0x27c), 263 REG16(0x278), 264 REG16(0x274), 265 REG16(0x270), 266 267 0 268 }; 269 270 static const u8 dg2_xcs_offsets[] = { 271 NOP(1), 272 LRI(15, POSTED), 273 REG16(0x244), 274 REG(0x034), 275 REG(0x030), 276 REG(0x038), 277 REG(0x03c), 278 REG(0x168), 279 REG(0x140), 280 REG(0x110), 281 REG(0x1c0), 282 REG(0x1c4), 283 REG(0x1c8), 284 REG(0x180), 285 REG16(0x2b4), 286 REG(0x120), 287 REG(0x124), 288 289 NOP(1), 290 LRI(9, POSTED), 291 REG16(0x3a8), 292 REG16(0x28c), 293 REG16(0x288), 294 REG16(0x284), 295 REG16(0x280), 296 REG16(0x27c), 297 REG16(0x278), 298 REG16(0x274), 299 REG16(0x270), 300 301 0 302 }; 303 304 static const u8 gen12_rcs_offsets[] = { 305 NOP(1), 306 LRI(13, POSTED), 307 REG16(0x244), 308 REG(0x034), 309 REG(0x030), 310 REG(0x038), 311 REG(0x03c), 312 REG(0x168), 313 REG(0x140), 314 REG(0x110), 315 REG(0x1c0), 316 REG(0x1c4), 317 REG(0x1c8), 318 REG(0x180), 319 REG16(0x2b4), 320 321 NOP(5), 322 LRI(9, POSTED), 323 REG16(0x3a8), 324 REG16(0x28c), 325 REG16(0x288), 326 REG16(0x284), 327 REG16(0x280), 328 REG16(0x27c), 329 REG16(0x278), 330 REG16(0x274), 331 REG16(0x270), 332 333 LRI(3, POSTED), 334 REG(0x1b0), 335 REG16(0x5a8), 336 REG16(0x5ac), 337 338 NOP(6), 339 LRI(1, 0), 340 REG(0x0c8), 341 NOP(3 + 9 + 1), 342 343 LRI(51, POSTED), 344 REG16(0x588), 345 REG16(0x588), 346 REG16(0x588), 347 REG16(0x588), 348 REG16(0x588), 349 REG16(0x588), 350 REG(0x028), 351 REG(0x09c), 352 REG(0x0c0), 353 REG(0x178), 354 REG(0x17c), 355 REG16(0x358), 356 REG(0x170), 357 REG(0x150), 358 REG(0x154), 359 REG(0x158), 360 REG16(0x41c), 361 REG16(0x600), 362 REG16(0x604), 363 REG16(0x608), 364 REG16(0x60c), 365 REG16(0x610), 366 REG16(0x614), 367 REG16(0x618), 368 REG16(0x61c), 369 REG16(0x620), 370 REG16(0x624), 371 REG16(0x628), 372 REG16(0x62c), 373 REG16(0x630), 374 REG16(0x634), 375 REG16(0x638), 376 REG16(0x63c), 377 REG16(0x640), 378 REG16(0x644), 379 REG16(0x648), 380 REG16(0x64c), 381 REG16(0x650), 382 REG16(0x654), 383 REG16(0x658), 384 REG16(0x65c), 385 REG16(0x660), 386 REG16(0x664), 387 REG16(0x668), 388 REG16(0x66c), 389 REG16(0x670), 390 REG16(0x674), 391 REG16(0x678), 392 REG16(0x67c), 393 REG(0x068), 394 REG(0x084), 395 NOP(1), 396 397 0 398 }; 399 400 static const u8 xehp_rcs_offsets[] = { 401 NOP(1), 402 LRI(13, POSTED), 403 REG16(0x244), 404 REG(0x034), 405 REG(0x030), 406 REG(0x038), 407 REG(0x03c), 408 REG(0x168), 409 REG(0x140), 410 REG(0x110), 411 REG(0x1c0), 412 REG(0x1c4), 413 REG(0x1c8), 414 REG(0x180), 415 REG16(0x2b4), 416 417 NOP(5), 418 LRI(9, POSTED), 419 REG16(0x3a8), 420 REG16(0x28c), 421 REG16(0x288), 422 REG16(0x284), 423 REG16(0x280), 424 REG16(0x27c), 425 REG16(0x278), 426 REG16(0x274), 427 REG16(0x270), 428 429 LRI(3, POSTED), 430 REG(0x1b0), 431 REG16(0x5a8), 432 REG16(0x5ac), 433 434 NOP(6), 435 LRI(1, 0), 436 REG(0x0c8), 437 438 0 439 }; 440 441 static const u8 dg2_rcs_offsets[] = { 442 NOP(1), 443 LRI(15, POSTED), 444 REG16(0x244), 445 REG(0x034), 446 REG(0x030), 447 REG(0x038), 448 REG(0x03c), 449 REG(0x168), 450 REG(0x140), 451 REG(0x110), 452 REG(0x1c0), 453 REG(0x1c4), 454 REG(0x1c8), 455 REG(0x180), 456 REG16(0x2b4), 457 REG(0x120), 458 REG(0x124), 459 460 NOP(1), 461 LRI(9, POSTED), 462 REG16(0x3a8), 463 REG16(0x28c), 464 REG16(0x288), 465 REG16(0x284), 466 REG16(0x280), 467 REG16(0x27c), 468 REG16(0x278), 469 REG16(0x274), 470 REG16(0x270), 471 472 LRI(3, POSTED), 473 REG(0x1b0), 474 REG16(0x5a8), 475 REG16(0x5ac), 476 477 NOP(6), 478 LRI(1, 0), 479 REG(0x0c8), 480 481 0 482 }; 483 484 static const u8 mtl_rcs_offsets[] = { 485 NOP(1), 486 LRI(15, POSTED), 487 REG16(0x244), 488 REG(0x034), 489 REG(0x030), 490 REG(0x038), 491 REG(0x03c), 492 REG(0x168), 493 REG(0x140), 494 REG(0x110), 495 REG(0x1c0), 496 REG(0x1c4), 497 REG(0x1c8), 498 REG(0x180), 499 REG16(0x2b4), 500 REG(0x120), 501 REG(0x124), 502 503 NOP(1), 504 LRI(9, POSTED), 505 REG16(0x3a8), 506 REG16(0x28c), 507 REG16(0x288), 508 REG16(0x284), 509 REG16(0x280), 510 REG16(0x27c), 511 REG16(0x278), 512 REG16(0x274), 513 REG16(0x270), 514 515 NOP(2), 516 LRI(2, POSTED), 517 REG16(0x5a8), 518 REG16(0x5ac), 519 520 NOP(6), 521 LRI(1, 0), 522 REG(0x0c8), 523 524 0 525 }; 526 527 #define XE2_CTX_COMMON \ 528 NOP(1), /* [0x00] */ \ 529 LRI(15, POSTED), /* [0x01] */ \ 530 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ 531 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ 532 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ 533 REG(0x038), /* [0x08] RING_BUFFER_START */ \ 534 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ 535 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ 536 REG(0x140), /* [0x0e] BB_ADDR */ \ 537 REG(0x110), /* [0x10] BB_STATE */ \ 538 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ 539 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ 540 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ 541 REG(0x180), /* [0x18] CCID */ \ 542 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ 543 REG(0x120), /* [0x1c] PRT_BB_STATE */ \ 544 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ 545 \ 546 NOP(1), /* [0x20] */ \ 547 LRI(9, POSTED), /* [0x21] */ \ 548 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ 549 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ 550 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ 551 REG16(0x284), /* [0x28] dummy reg */ \ 552 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ 553 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ 554 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ 555 REG16(0x274), /* [0x30] PTBP_UDW */ \ 556 REG16(0x270) /* [0x32] PTBP_LDW */ 557 558 static const u8 xe2_rcs_offsets[] = { 559 XE2_CTX_COMMON, 560 561 NOP(2), /* [0x34] */ 562 LRI(2, POSTED), /* [0x36] */ 563 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ 564 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ 565 566 NOP(6), /* [0x41] */ 567 LRI(1, 0), /* [0x47] */ 568 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ 569 570 0 571 }; 572 573 static const u8 xe2_bcs_offsets[] = { 574 XE2_CTX_COMMON, 575 576 NOP(4 + 8 + 1), /* [0x34] */ 577 LRI(2, POSTED), /* [0x41] */ 578 REG16(0x200), /* [0x42] BCS_SWCTRL */ 579 REG16(0x204), /* [0x44] BLIT_CCTL */ 580 581 0 582 }; 583 584 static const u8 xe2_xcs_offsets[] = { 585 XE2_CTX_COMMON, 586 587 0 588 }; 589 590 static const u8 xe2_indirect_ring_state_offsets[] = { 591 NOP(1), /* [0x00] */ 592 LRI(5, POSTED), /* [0x01] */ 593 REG(0x034), /* [0x02] RING_BUFFER_HEAD */ 594 REG(0x030), /* [0x04] RING_BUFFER_TAIL */ 595 REG(0x038), /* [0x06] RING_BUFFER_START */ 596 REG(0x048), /* [0x08] RING_BUFFER_START_UDW */ 597 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ 598 599 NOP(5), /* [0x0c] */ 600 LRI(9, POSTED), /* [0x11] */ 601 REG(0x168), /* [0x12] BB_ADDR_UDW */ 602 REG(0x140), /* [0x14] BB_ADDR */ 603 REG(0x110), /* [0x16] BB_STATE */ 604 REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */ 605 REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */ 606 REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */ 607 REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */ 608 REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */ 609 REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */ 610 611 NOP(12), /* [0x00] */ 612 613 0 614 }; 615 616 #undef REG16 617 #undef REG 618 #undef LRI 619 #undef NOP 620 621 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) 622 { 623 if (class == XE_ENGINE_CLASS_RENDER) { 624 if (GRAPHICS_VER(xe) >= 20) 625 return xe2_rcs_offsets; 626 else if (GRAPHICS_VERx100(xe) >= 1270) 627 return mtl_rcs_offsets; 628 else if (GRAPHICS_VERx100(xe) >= 1255) 629 return dg2_rcs_offsets; 630 else if (GRAPHICS_VERx100(xe) >= 1250) 631 return xehp_rcs_offsets; 632 else 633 return gen12_rcs_offsets; 634 } else if (class == XE_ENGINE_CLASS_COPY) { 635 if (GRAPHICS_VER(xe) >= 20) 636 return xe2_bcs_offsets; 637 else 638 return gen12_xcs_offsets; 639 } else { 640 if (GRAPHICS_VER(xe) >= 20) 641 return xe2_xcs_offsets; 642 else if (GRAPHICS_VERx100(xe) >= 1255) 643 return dg2_xcs_offsets; 644 else 645 return gen12_xcs_offsets; 646 } 647 } 648 649 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) 650 { 651 regs[CTX_CONTEXT_CONTROL] = REG_MASKED_FIELD_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | 652 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 653 654 if (xe_gt_has_indirect_ring_state(hwe->gt)) 655 regs[CTX_CONTEXT_CONTROL] |= 656 REG_MASKED_FIELD_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); 657 } 658 659 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) 660 { 661 struct xe_memirq *memirq = >_to_tile(hwe->gt)->memirq; 662 struct xe_device *xe = gt_to_xe(hwe->gt); 663 u8 num_regs; 664 665 if (!xe_device_uses_memirq(xe)) 666 return; 667 668 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | 669 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; 670 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 671 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 672 673 num_regs = xe_device_has_msix(xe) ? 3 : 2; 674 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) | 675 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 676 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 677 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe); 678 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 679 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe); 680 681 if (xe_device_has_msix(xe)) { 682 regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr; 683 /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */ 684 } 685 } 686 687 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc) 688 { 689 return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE; 690 } 691 692 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) 693 { 694 return 0; 695 } 696 697 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) 698 { 699 return lrc->ring.size; 700 } 701 702 /* Make the magic macros work */ 703 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 704 #define __xe_lrc_regs_offset xe_lrc_regs_offset 705 706 #define LRC_CTX_JOB_TIMESTAMP_OFFSET 512 707 #define LRC_ENGINE_ID_PPHWSP_OFFSET 1024 708 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 709 710 #define LRC_SEQNO_OFFSET 0 711 #define LRC_START_SEQNO_OFFSET (LRC_SEQNO_OFFSET + 8) 712 713 u32 xe_lrc_regs_offset(struct xe_lrc *lrc) 714 { 715 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 716 } 717 718 /** 719 * xe_lrc_reg_size() - Get size of the LRC registers area within queues 720 * @xe: the &xe_device struct instance 721 * 722 * Returns: Size of the LRC registers area for current platform 723 */ 724 size_t xe_lrc_reg_size(struct xe_device *xe) 725 { 726 if (GRAPHICS_VERx100(xe) >= 1250) 727 return 96 * sizeof(u32); 728 else 729 return 80 * sizeof(u32); 730 } 731 732 /** 733 * xe_lrc_engine_state_size() - Get size of the engine state within LRC 734 * @gt: the &xe_gt struct instance 735 * @class: Hardware engine class 736 * 737 * Returns: Size of the engine state 738 */ 739 size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class) 740 { 741 return xe_gt_lrc_hang_replay_size(gt, class) - xe_lrc_reg_size(gt_to_xe(gt)); 742 } 743 744 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) 745 { 746 return LRC_SEQNO_OFFSET; 747 } 748 749 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) 750 { 751 return LRC_START_SEQNO_OFFSET; 752 } 753 754 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) 755 { 756 /* This is stored in the driver-defined portion of PPHWSP */ 757 return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; 758 } 759 760 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 761 { 762 /* The parallel is stored in the driver-defined portion of PPHWSP */ 763 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 764 } 765 766 static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc) 767 { 768 return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET; 769 } 770 771 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) 772 { 773 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); 774 } 775 776 static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) 777 { 778 return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32); 779 } 780 781 static u32 __xe_lrc_queue_timestamp_offset(struct xe_lrc *lrc) 782 { 783 return __xe_lrc_regs_offset(lrc) + CTX_QUEUE_TIMESTAMP * sizeof(u32); 784 } 785 786 static u32 __xe_lrc_queue_timestamp_udw_offset(struct xe_lrc *lrc) 787 { 788 return __xe_lrc_regs_offset(lrc) + CTX_QUEUE_TIMESTAMP_UDW * sizeof(u32); 789 } 790 791 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) 792 { 793 u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - 794 LRC_INDIRECT_RING_STATE_SIZE; 795 796 if (lrc->flags & XE_LRC_FLAG_INDIRECT_CTX) 797 offset -= LRC_INDIRECT_CTX_BO_SIZE; 798 799 return offset; 800 } 801 802 static inline u32 __xe_lrc_indirect_ctx_offset(struct xe_lrc *lrc) 803 { 804 return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_CTX_BO_SIZE; 805 } 806 807 static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) 808 { 809 return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE; 810 } 811 812 #define DECL_MAP_ADDR_HELPERS(elem, bo_expr) \ 813 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ 814 { \ 815 struct xe_bo *bo = (bo_expr); \ 816 struct iosys_map map = bo->vmap; \ 817 \ 818 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ 819 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ 820 return map; \ 821 } \ 822 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ 823 { \ 824 struct xe_bo *bo = (bo_expr); \ 825 \ 826 return xe_bo_ggtt_addr(bo) + __xe_lrc_##elem##_offset(lrc); \ 827 } \ 828 829 DECL_MAP_ADDR_HELPERS(ring, lrc->bo) 830 DECL_MAP_ADDR_HELPERS(pphwsp, lrc->bo) 831 DECL_MAP_ADDR_HELPERS(seqno, lrc->seqno_bo) 832 DECL_MAP_ADDR_HELPERS(regs, lrc->bo) 833 DECL_MAP_ADDR_HELPERS(start_seqno, lrc->seqno_bo) 834 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp, lrc->bo) 835 DECL_MAP_ADDR_HELPERS(ctx_timestamp, lrc->bo) 836 DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw, lrc->bo) 837 DECL_MAP_ADDR_HELPERS(parallel, lrc->bo) 838 DECL_MAP_ADDR_HELPERS(indirect_ring, lrc->bo) 839 DECL_MAP_ADDR_HELPERS(engine_id, lrc->bo) 840 DECL_MAP_ADDR_HELPERS(queue_timestamp, lrc->bo) 841 DECL_MAP_ADDR_HELPERS(queue_timestamp_udw, lrc->bo) 842 843 #undef DECL_MAP_ADDR_HELPERS 844 845 /** 846 * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address 847 * @lrc: Pointer to the lrc. 848 * 849 * Returns: ctx timestamp GGTT address 850 */ 851 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) 852 { 853 return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); 854 } 855 856 /** 857 * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address 858 * @lrc: Pointer to the lrc. 859 * 860 * Returns: ctx timestamp udw GGTT address 861 */ 862 u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc) 863 { 864 return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); 865 } 866 867 /** 868 * xe_lrc_ctx_timestamp() - Read ctx timestamp value 869 * @lrc: Pointer to the lrc. 870 * 871 * Returns: ctx timestamp value 872 */ 873 static u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) 874 { 875 struct xe_device *xe = lrc_to_xe(lrc); 876 struct iosys_map map; 877 u32 ldw, udw = 0; 878 879 map = __xe_lrc_ctx_timestamp_map(lrc); 880 ldw = xe_map_read32(xe, &map); 881 882 if (xe->info.has_64bit_timestamp) { 883 map = __xe_lrc_ctx_timestamp_udw_map(lrc); 884 udw = xe_map_read32(xe, &map); 885 } 886 887 return (u64)udw << 32 | ldw; 888 } 889 890 /** 891 * xe_lrc_queue_timestamp() - Read queue timestamp value 892 * @lrc: Pointer to the lrc. 893 * 894 * Returns: queue timestamp value 895 */ 896 static u64 xe_lrc_queue_timestamp(struct xe_lrc *lrc) 897 { 898 struct xe_device *xe = lrc_to_xe(lrc); 899 struct iosys_map map; 900 u32 ldw, udw = 0; 901 902 xe_assert(xe, xe_lrc_is_multi_queue(lrc)); 903 904 map = __xe_lrc_queue_timestamp_map(lrc); 905 ldw = xe_map_read32(xe, &map); 906 907 map = __xe_lrc_queue_timestamp_udw_map(lrc); 908 udw = xe_map_read32(xe, &map); 909 910 return (u64)udw << 32 | ldw; 911 } 912 913 /** 914 * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address 915 * @lrc: Pointer to the lrc. 916 * 917 * Returns: ctx timestamp job GGTT address 918 */ 919 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) 920 { 921 return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); 922 } 923 924 /** 925 * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value 926 * @lrc: Pointer to the lrc. 927 * 928 * Returns: ctx timestamp job value 929 */ 930 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) 931 { 932 struct xe_device *xe = lrc_to_xe(lrc); 933 struct iosys_map map; 934 935 map = __xe_lrc_ctx_job_timestamp_map(lrc); 936 return xe_map_read32(xe, &map); 937 } 938 939 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 940 { 941 return __xe_lrc_pphwsp_ggtt_addr(lrc); 942 } 943 944 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc) 945 { 946 if (!xe_lrc_has_indirect_ring_state(lrc)) 947 return 0; 948 949 return __xe_lrc_indirect_ring_ggtt_addr(lrc); 950 } 951 952 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr) 953 { 954 struct xe_device *xe = lrc_to_xe(lrc); 955 struct iosys_map map; 956 957 map = __xe_lrc_indirect_ring_map(lrc); 958 iosys_map_incr(&map, reg_nr * sizeof(u32)); 959 return xe_map_read32(xe, &map); 960 } 961 962 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc, 963 int reg_nr, u32 val) 964 { 965 struct xe_device *xe = lrc_to_xe(lrc); 966 struct iosys_map map; 967 968 map = __xe_lrc_indirect_ring_map(lrc); 969 iosys_map_incr(&map, reg_nr * sizeof(u32)); 970 xe_map_write32(xe, &map, val); 971 } 972 973 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) 974 { 975 struct xe_device *xe = lrc_to_xe(lrc); 976 struct iosys_map map; 977 978 map = __xe_lrc_regs_map(lrc); 979 iosys_map_incr(&map, reg_nr * sizeof(u32)); 980 return xe_map_read32(xe, &map); 981 } 982 983 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) 984 { 985 struct xe_device *xe = lrc_to_xe(lrc); 986 struct iosys_map map; 987 988 map = __xe_lrc_regs_map(lrc); 989 iosys_map_incr(&map, reg_nr * sizeof(u32)); 990 xe_map_write32(xe, &map, val); 991 } 992 993 static void *empty_lrc_data(struct xe_hw_engine *hwe) 994 { 995 struct xe_gt *gt = hwe->gt; 996 void *data; 997 u32 *regs; 998 999 data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); 1000 if (!data) 1001 return NULL; 1002 1003 /* 1st page: Per-Process of HW status Page */ 1004 regs = data + LRC_PPHWSP_SIZE; 1005 set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe); 1006 set_context_control(regs, hwe); 1007 set_memory_based_intr(regs, hwe); 1008 if (xe_gt_has_indirect_ring_state(gt)) { 1009 regs = data + xe_gt_lrc_size(gt, hwe->class) - 1010 LRC_INDIRECT_RING_STATE_SIZE; 1011 set_offsets(regs, xe2_indirect_ring_state_offsets, hwe); 1012 } 1013 1014 return data; 1015 } 1016 1017 /** 1018 * xe_default_lrc_update_memirq_regs_with_address - Re-compute GGTT references in default LRC 1019 * of given engine. 1020 * @hwe: the &xe_hw_engine struct instance 1021 */ 1022 void xe_default_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe) 1023 { 1024 struct xe_gt *gt = hwe->gt; 1025 u32 *regs; 1026 1027 if (!gt->default_lrc[hwe->class]) 1028 return; 1029 1030 regs = gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE; 1031 set_memory_based_intr(regs, hwe); 1032 } 1033 1034 /** 1035 * xe_lrc_update_memirq_regs_with_address - Re-compute GGTT references in mem interrupt data 1036 * for given LRC. 1037 * @lrc: the &xe_lrc struct instance 1038 * @hwe: the &xe_hw_engine struct instance 1039 * @regs: scratch buffer to be used as temporary storage 1040 */ 1041 void xe_lrc_update_memirq_regs_with_address(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 1042 u32 *regs) 1043 { 1044 struct xe_gt *gt = hwe->gt; 1045 struct iosys_map map; 1046 size_t regs_len; 1047 1048 if (!xe_device_uses_memirq(gt_to_xe(gt))) 1049 return; 1050 1051 map = __xe_lrc_regs_map(lrc); 1052 regs_len = xe_lrc_reg_size(gt_to_xe(gt)); 1053 xe_map_memcpy_from(gt_to_xe(gt), regs, &map, 0, regs_len); 1054 set_memory_based_intr(regs, hwe); 1055 xe_map_memcpy_to(gt_to_xe(gt), &map, 0, regs, regs_len); 1056 } 1057 1058 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) 1059 { 1060 u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt)); 1061 1062 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); 1063 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); 1064 } 1065 1066 static void xe_lrc_finish(struct xe_lrc *lrc) 1067 { 1068 xe_hw_fence_ctx_finish(&lrc->fence_ctx); 1069 xe_bo_unpin_map_no_vm(lrc->bo); 1070 xe_bo_unpin_map_no_vm(lrc->seqno_bo); 1071 } 1072 1073 /* 1074 * wa_bb_setup_utilization() - Write commands to wa bb to assist 1075 * in calculating active context run ticks. 1076 * 1077 * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the 1078 * context, but only gets updated when the context switches out. In order to 1079 * check how long a context has been active before it switches out, two things 1080 * are required: 1081 * 1082 * (1) Determine if the context is running: 1083 * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in 1084 * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is 1085 * initialized. During a query, we just check for this value to determine if the 1086 * context is active. If the context switched out, it would overwrite this 1087 * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as 1088 * the last part of context restore, so reusing this LRC location will not 1089 * clobber anything. 1090 * 1091 * (2) Calculate the time that the context has been active for: 1092 * The CTX_TIMESTAMP ticks only when the context is active. If a context is 1093 * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization. 1094 * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific 1095 * engine instance. Since we do not know which instance the context is running 1096 * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and 1097 * store it in the PPHSWP. 1098 */ 1099 #define CONTEXT_ACTIVE 1ULL 1100 static ssize_t setup_utilization_wa(struct xe_lrc *lrc, 1101 struct xe_hw_engine *hwe, 1102 u32 *batch, 1103 size_t max_len) 1104 { 1105 u32 *cmd = batch; 1106 1107 if (IS_SRIOV_VF(gt_to_xe(lrc->gt))) 1108 return 0; 1109 1110 if (xe_gt_WARN_ON(lrc->gt, max_len < 12)) 1111 return -ENOSPC; 1112 1113 *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; 1114 *cmd++ = ENGINE_ID(0).addr; 1115 *cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc); 1116 *cmd++ = 0; 1117 1118 *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); 1119 *cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc); 1120 *cmd++ = 0; 1121 *cmd++ = lower_32_bits(CONTEXT_ACTIVE); 1122 1123 if (lrc_to_xe(lrc)->info.has_64bit_timestamp) { 1124 *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); 1125 *cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); 1126 *cmd++ = 0; 1127 *cmd++ = upper_32_bits(CONTEXT_ACTIVE); 1128 } 1129 1130 return cmd - batch; 1131 } 1132 1133 static ssize_t setup_timestamp_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 1134 u32 *batch, size_t max_len) 1135 { 1136 const u32 ts_addr = __xe_lrc_ctx_timestamp_ggtt_addr(lrc); 1137 u32 *cmd = batch; 1138 1139 if (!XE_GT_WA(lrc->gt, 16010904313) || 1140 !(hwe->class == XE_ENGINE_CLASS_RENDER || 1141 hwe->class == XE_ENGINE_CLASS_COMPUTE || 1142 hwe->class == XE_ENGINE_CLASS_COPY || 1143 hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE || 1144 hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE)) 1145 return 0; 1146 1147 if (xe_gt_WARN_ON(lrc->gt, max_len < 12)) 1148 return -ENOSPC; 1149 1150 *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO | 1151 MI_LRM_ASYNC; 1152 *cmd++ = RING_CTX_TIMESTAMP(0).addr; 1153 *cmd++ = ts_addr; 1154 *cmd++ = 0; 1155 1156 *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO | 1157 MI_LRM_ASYNC; 1158 *cmd++ = RING_CTX_TIMESTAMP(0).addr; 1159 *cmd++ = ts_addr; 1160 *cmd++ = 0; 1161 1162 *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO; 1163 *cmd++ = RING_CTX_TIMESTAMP(0).addr; 1164 *cmd++ = ts_addr; 1165 *cmd++ = 0; 1166 1167 return cmd - batch; 1168 } 1169 1170 static ssize_t setup_configfs_post_ctx_restore_bb(struct xe_lrc *lrc, 1171 struct xe_hw_engine *hwe, 1172 u32 *batch, size_t max_len) 1173 { 1174 struct xe_device *xe = gt_to_xe(lrc->gt); 1175 const u32 *user_batch; 1176 u32 *cmd = batch; 1177 u32 count; 1178 1179 count = xe_configfs_get_ctx_restore_post_bb(to_pci_dev(xe->drm.dev), 1180 hwe->class, &user_batch); 1181 if (!count) 1182 return 0; 1183 1184 if (count > max_len) 1185 return -ENOSPC; 1186 1187 /* 1188 * This should be used only for tests and validation. Taint the kernel 1189 * as anything could be submitted directly in context switches 1190 */ 1191 add_taint(TAINT_TEST, LOCKDEP_STILL_OK); 1192 1193 memcpy(cmd, user_batch, count * sizeof(u32)); 1194 cmd += count; 1195 1196 return cmd - batch; 1197 } 1198 1199 static ssize_t setup_configfs_mid_ctx_restore_bb(struct xe_lrc *lrc, 1200 struct xe_hw_engine *hwe, 1201 u32 *batch, size_t max_len) 1202 { 1203 struct xe_device *xe = gt_to_xe(lrc->gt); 1204 const u32 *user_batch; 1205 u32 *cmd = batch; 1206 u32 count; 1207 1208 count = xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev), 1209 hwe->class, &user_batch); 1210 if (!count) 1211 return 0; 1212 1213 if (count > max_len) 1214 return -ENOSPC; 1215 1216 /* 1217 * This should be used only for tests and validation. Taint the kernel 1218 * as anything could be submitted directly in context switches 1219 */ 1220 add_taint(TAINT_TEST, LOCKDEP_STILL_OK); 1221 1222 memcpy(cmd, user_batch, count * sizeof(u32)); 1223 cmd += count; 1224 1225 return cmd - batch; 1226 } 1227 1228 static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, 1229 struct xe_hw_engine *hwe, 1230 u32 *batch, size_t max_len) 1231 { 1232 u32 *cmd = batch; 1233 1234 if (!XE_GT_WA(lrc->gt, 18022495364) || 1235 hwe->class != XE_ENGINE_CLASS_RENDER) 1236 return 0; 1237 1238 if (xe_gt_WARN_ON(lrc->gt, max_len < 3)) 1239 return -ENOSPC; 1240 1241 *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_LRM_CS_MMIO | MI_LRI_NUM_REGS(1); 1242 *cmd++ = CS_DEBUG_MODE2(0).addr; 1243 *cmd++ = REG_MASKED_FIELD_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); 1244 1245 return cmd - batch; 1246 } 1247 1248 static ssize_t setup_invalidate_auxccs_wa(struct xe_lrc *lrc, 1249 struct xe_hw_engine *hwe, 1250 u32 *batch, size_t max_len) 1251 { 1252 struct xe_gt *gt = lrc->gt; 1253 u32 *(*emit)(struct xe_gt *gt, u32 *cmd) = 1254 gt->ring_ops[hwe->class]->emit_aux_table_inv; 1255 1256 if (!emit) 1257 return 0; 1258 1259 if (xe_gt_WARN_ON(gt, max_len < 8)) 1260 return -ENOSPC; 1261 1262 return emit(gt, batch) - batch; 1263 } 1264 1265 struct bo_setup { 1266 ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 1267 u32 *batch, size_t max_size); 1268 }; 1269 1270 struct bo_setup_state { 1271 /* Input: */ 1272 struct xe_lrc *lrc; 1273 struct xe_hw_engine *hwe; 1274 size_t max_size; 1275 size_t reserve_dw; 1276 unsigned int offset; 1277 const struct bo_setup *funcs; 1278 unsigned int num_funcs; 1279 1280 /* State: */ 1281 u32 *buffer; 1282 u32 *ptr; 1283 unsigned int written; 1284 }; 1285 1286 static int setup_bo(struct bo_setup_state *state) 1287 { 1288 ssize_t remain; 1289 1290 if (state->lrc->bo->vmap.is_iomem) { 1291 xe_gt_assert(state->hwe->gt, state->buffer); 1292 state->ptr = state->buffer; 1293 } else { 1294 state->ptr = state->lrc->bo->vmap.vaddr + state->offset; 1295 } 1296 1297 remain = state->max_size / sizeof(u32); 1298 1299 for (size_t i = 0; i < state->num_funcs; i++) { 1300 ssize_t len = state->funcs[i].setup(state->lrc, state->hwe, 1301 state->ptr, remain); 1302 1303 remain -= len; 1304 1305 /* 1306 * Caller has asked for at least reserve_dw to remain unused. 1307 */ 1308 if (len < 0 || 1309 xe_gt_WARN_ON(state->lrc->gt, remain < state->reserve_dw)) 1310 goto fail; 1311 1312 state->ptr += len; 1313 state->written += len; 1314 } 1315 1316 return 0; 1317 1318 fail: 1319 return -ENOSPC; 1320 } 1321 1322 static void finish_bo(struct bo_setup_state *state) 1323 { 1324 if (!state->lrc->bo->vmap.is_iomem) 1325 return; 1326 1327 xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap, 1328 state->offset, state->buffer, 1329 state->written * sizeof(u32)); 1330 } 1331 1332 /** 1333 * xe_lrc_setup_wa_bb_with_scratch - Execute all wa bb setup callbacks. 1334 * @lrc: the &xe_lrc struct instance 1335 * @hwe: the &xe_hw_engine struct instance 1336 * @scratch: preallocated scratch buffer for temporary storage 1337 * Return: 0 on success, negative error code on failure 1338 */ 1339 int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *scratch) 1340 { 1341 static const struct bo_setup funcs[] = { 1342 { .setup = setup_timestamp_wa }, 1343 { .setup = setup_invalidate_state_cache_wa }, 1344 { .setup = setup_utilization_wa }, 1345 { .setup = setup_configfs_post_ctx_restore_bb }, 1346 }; 1347 struct bo_setup_state state = { 1348 .lrc = lrc, 1349 .hwe = hwe, 1350 .max_size = LRC_WA_BB_SIZE, 1351 .buffer = scratch, 1352 .reserve_dw = 1, 1353 .offset = __xe_lrc_wa_bb_offset(lrc), 1354 .funcs = funcs, 1355 .num_funcs = ARRAY_SIZE(funcs), 1356 }; 1357 int ret; 1358 1359 ret = setup_bo(&state); 1360 if (ret) 1361 return ret; 1362 1363 *state.ptr++ = MI_BATCH_BUFFER_END; 1364 state.written++; 1365 1366 finish_bo(&state); 1367 1368 xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, 1369 xe_bo_ggtt_addr(lrc->bo) + state.offset + 1); 1370 1371 return 0; 1372 } 1373 1374 static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) 1375 { 1376 u32 *buf = NULL; 1377 int ret; 1378 1379 if (lrc->bo->vmap.is_iomem) { 1380 buf = kmalloc(LRC_WA_BB_SIZE, GFP_KERNEL); 1381 if (!buf) 1382 return -ENOMEM; 1383 } 1384 1385 ret = xe_lrc_setup_wa_bb_with_scratch(lrc, hwe, buf); 1386 1387 kfree(buf); 1388 1389 return ret; 1390 } 1391 1392 static int 1393 setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) 1394 { 1395 static const struct bo_setup rcs_funcs[] = { 1396 { .setup = setup_timestamp_wa }, 1397 { .setup = setup_invalidate_auxccs_wa }, 1398 { .setup = setup_configfs_mid_ctx_restore_bb }, 1399 }; 1400 static const struct bo_setup xcs_funcs[] = { 1401 { .setup = setup_invalidate_auxccs_wa }, 1402 { .setup = setup_configfs_mid_ctx_restore_bb }, 1403 }; 1404 struct bo_setup_state state = { 1405 .lrc = lrc, 1406 .hwe = hwe, 1407 .max_size = (63 * 64) /* max 63 cachelines */, 1408 .buffer = NULL, 1409 .offset = __xe_lrc_indirect_ctx_offset(lrc), 1410 }; 1411 int ret; 1412 1413 if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX)) 1414 return 0; 1415 1416 if (hwe->class == XE_ENGINE_CLASS_RENDER || 1417 hwe->class == XE_ENGINE_CLASS_COMPUTE) { 1418 state.funcs = rcs_funcs; 1419 state.num_funcs = ARRAY_SIZE(rcs_funcs); 1420 } else { 1421 state.funcs = xcs_funcs; 1422 state.num_funcs = ARRAY_SIZE(xcs_funcs); 1423 } 1424 1425 if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) 1426 return 0; 1427 1428 if (lrc->bo->vmap.is_iomem) { 1429 state.buffer = kmalloc(state.max_size, GFP_KERNEL); 1430 if (!state.buffer) 1431 return -ENOMEM; 1432 } 1433 1434 ret = setup_bo(&state); 1435 if (ret) { 1436 kfree(state.buffer); 1437 return ret; 1438 } 1439 1440 /* 1441 * Align to 64B cacheline so there's no garbage at the end for CS to 1442 * execute: size for indirect ctx must be a multiple of 64. 1443 */ 1444 while (state.written & 0xf) { 1445 *state.ptr++ = MI_NOOP; 1446 state.written++; 1447 } 1448 1449 finish_bo(&state); 1450 kfree(state.buffer); 1451 1452 /* 1453 * Enable INDIRECT_CTX leaving INDIRECT_CTX_OFFSET at its default: it 1454 * varies per engine class, but the default is good enough 1455 */ 1456 xe_lrc_write_ctx_reg(lrc, 1457 CTX_CS_INDIRECT_CTX, 1458 (xe_bo_ggtt_addr(lrc->bo) + state.offset) | 1459 /* Size in CLs. */ 1460 (state.written * sizeof(u32) / 64)); 1461 1462 return 0; 1463 } 1464 1465 static u8 xe_multi_queue_prio_to_lrc(struct xe_lrc *lrc, enum xe_multi_queue_priority priority) 1466 { 1467 struct xe_device *xe = gt_to_xe(lrc->gt); 1468 1469 xe_assert(xe, (priority >= XE_MULTI_QUEUE_PRIORITY_LOW && 1470 priority <= XE_MULTI_QUEUE_PRIORITY_HIGH)); 1471 1472 /* xe_multi_queue_priority is directly mapped to LRC priority values */ 1473 return priority; 1474 } 1475 1476 /** 1477 * xe_lrc_set_multi_queue_priority() - Set multi queue priority in LRC 1478 * @lrc: Logical Ring Context 1479 * @priority: Multi queue priority of the exec queue 1480 * 1481 * Convert @priority to LRC multi queue priority and update the @lrc descriptor 1482 */ 1483 void xe_lrc_set_multi_queue_priority(struct xe_lrc *lrc, enum xe_multi_queue_priority priority) 1484 { 1485 lrc->desc &= ~LRC_PRIORITY; 1486 lrc->desc |= FIELD_PREP(LRC_PRIORITY, xe_multi_queue_prio_to_lrc(lrc, priority)); 1487 } 1488 1489 static int xe_lrc_ctx_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, 1490 void *replay_state, u16 msix_vec, u32 init_flags) 1491 { 1492 struct xe_gt *gt = hwe->gt; 1493 struct xe_tile *tile = gt_to_tile(gt); 1494 struct xe_device *xe = gt_to_xe(gt); 1495 struct iosys_map map; 1496 u32 arb_enable; 1497 u32 state_cache_perf_fix[3]; 1498 int err; 1499 1500 /* 1501 * Init Per-Process of HW status Page, LRC / context state to known 1502 * values. If there's already a primed default_lrc, just copy it, otherwise 1503 * it's the early submission to record the lrc: build a new empty one from 1504 * scratch. 1505 */ 1506 map = __xe_lrc_pphwsp_map(lrc); 1507 if (gt->default_lrc[hwe->class] || replay_state) { 1508 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 1509 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 1510 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 1511 lrc->size - LRC_PPHWSP_SIZE); 1512 if (replay_state) 1513 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 1514 replay_state, lrc->replay_size); 1515 } else { 1516 void *init_data = empty_lrc_data(hwe); 1517 1518 if (!init_data) { 1519 return -ENOMEM; 1520 } 1521 1522 xe_map_memcpy_to(xe, &map, 0, init_data, lrc->size); 1523 kfree(init_data); 1524 } 1525 1526 if (vm) 1527 xe_lrc_set_ppgtt(lrc, vm); 1528 1529 if (xe_device_has_msix(xe)) { 1530 xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR, 1531 xe_memirq_status_ptr(&tile->memirq, hwe)); 1532 xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR, 1533 xe_memirq_source_ptr(&tile->memirq, hwe)); 1534 xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec); 1535 } 1536 1537 if (xe_gt_has_indirect_ring_state(gt)) { 1538 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, 1539 __xe_lrc_indirect_ring_ggtt_addr(lrc)); 1540 1541 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, 1542 __xe_lrc_ring_ggtt_addr(lrc)); 1543 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); 1544 1545 /* Match head and tail pointers */ 1546 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, lrc->ring.tail); 1547 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); 1548 1549 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, 1550 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 1551 } else { 1552 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 1553 1554 /* Match head and tail pointers */ 1555 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, lrc->ring.tail); 1556 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 1557 1558 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, 1559 RING_CTL_SIZE(lrc->ring.size) | RING_VALID); 1560 } 1561 1562 if (init_flags & XE_LRC_CREATE_RUNALONE) 1563 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL, 1564 xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | 1565 REG_MASKED_FIELD_ENABLE(CTX_CTRL_RUN_ALONE)); 1566 1567 if (init_flags & XE_LRC_CREATE_PXP) 1568 xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL, 1569 xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | 1570 REG_MASKED_FIELD_ENABLE(CTX_CTRL_PXP_ENABLE)); 1571 1572 lrc->ctx_timestamp = 0; 1573 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); 1574 if (lrc_to_xe(lrc)->info.has_64bit_timestamp) 1575 xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); 1576 1577 /* 1578 * Note: It's possible that this LRC may belong to an exec_queue that is 1579 * not part of a multi-queue group. That said, it doesn't hurt to set 1580 * this field anyways since any class that supports multi-queue will 1581 * have these LRC fields defined. 1582 */ 1583 if (xe_gt_supports_multi_queue(gt, hwe->class)) { 1584 lrc->queue_timestamp = 0; 1585 xe_lrc_write_ctx_reg(lrc, CTX_QUEUE_TIMESTAMP, 0); 1586 xe_lrc_write_ctx_reg(lrc, CTX_QUEUE_TIMESTAMP_UDW, 0); 1587 } 1588 1589 if (xe->info.has_asid && vm) 1590 xe_lrc_write_ctx_reg(lrc, CTX_ASID, vm->usm.asid); 1591 1592 lrc->desc = LRC_VALID; 1593 lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); 1594 /* TODO: Priority */ 1595 1596 /* While this appears to have something about privileged batches or 1597 * some such, it really just means PPGTT mode. 1598 */ 1599 if (vm) 1600 lrc->desc |= LRC_PRIVILEGE; 1601 1602 if (GRAPHICS_VERx100(xe) < 1250) { 1603 lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); 1604 lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); 1605 } 1606 1607 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1608 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); 1609 1610 if (init_flags & XE_LRC_DISABLE_STATE_CACHE_PERF_FIX) { 1611 state_cache_perf_fix[0] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); 1612 state_cache_perf_fix[1] = COMMON_SLICE_CHICKEN3.addr; 1613 state_cache_perf_fix[2] = REG_MASKED_FIELD_ENABLE(DISABLE_STATE_CACHE_PERF_FIX); 1614 xe_lrc_write_ring(lrc, state_cache_perf_fix, sizeof(state_cache_perf_fix)); 1615 } 1616 1617 map = __xe_lrc_seqno_map(lrc); 1618 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1619 1620 map = __xe_lrc_start_seqno_map(lrc); 1621 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1622 1623 err = setup_wa_bb(lrc, hwe); 1624 if (err) 1625 return err; 1626 1627 err = setup_indirect_ctx(lrc, hwe); 1628 1629 return err; 1630 } 1631 1632 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, 1633 void *replay_state, u32 ring_size, u16 msix_vec, u32 init_flags) 1634 { 1635 struct xe_gt *gt = hwe->gt; 1636 const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); 1637 u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; 1638 struct xe_tile *tile = gt_to_tile(gt); 1639 struct xe_device *xe = gt_to_xe(gt); 1640 struct xe_bo *bo; 1641 u32 bo_flags; 1642 int err; 1643 1644 kref_init(&lrc->refcount); 1645 lrc->gt = gt; 1646 lrc->replay_size = xe_gt_lrc_hang_replay_size(gt, hwe->class); 1647 lrc->size = lrc_size; 1648 lrc->flags = 0; 1649 lrc->ring.size = ring_size; 1650 lrc->ring.tail = 0; 1651 1652 if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { 1653 lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; 1654 bo_size += LRC_INDIRECT_CTX_BO_SIZE; 1655 } 1656 1657 if (xe_gt_has_indirect_ring_state(gt)) 1658 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; 1659 1660 bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | 1661 XE_BO_FLAG_GGTT_INVALIDATE; 1662 1663 if ((vm && vm->xef) || init_flags & XE_LRC_CREATE_USER_CTX) /* userspace */ 1664 bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM; 1665 1666 bo = xe_bo_create_pin_map_novm(xe, tile, bo_size, 1667 ttm_bo_type_kernel, 1668 bo_flags, false); 1669 if (IS_ERR(bo)) 1670 return PTR_ERR(bo); 1671 1672 lrc->bo = bo; 1673 1674 bo = xe_bo_create_pin_map_novm(xe, tile, PAGE_SIZE, 1675 ttm_bo_type_kernel, 1676 XE_BO_FLAG_GGTT | 1677 XE_BO_FLAG_GGTT_INVALIDATE | 1678 XE_BO_FLAG_SYSTEM, false); 1679 if (IS_ERR(bo)) { 1680 err = PTR_ERR(bo); 1681 goto err_lrc_finish; 1682 } 1683 lrc->seqno_bo = bo; 1684 1685 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 1686 hwe->fence_irq, hwe->name); 1687 1688 err = xe_lrc_ctx_init(lrc, hwe, vm, replay_state, msix_vec, init_flags); 1689 if (err) 1690 goto err_lrc_finish; 1691 1692 if (vm && vm->xef) 1693 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 1694 1695 return 0; 1696 1697 err_lrc_finish: 1698 xe_lrc_finish(lrc); 1699 return err; 1700 } 1701 1702 /** 1703 * xe_lrc_create - Create a LRC 1704 * @hwe: Hardware Engine 1705 * @vm: The VM (address space) 1706 * @replay_state: GPU hang replay state 1707 * @ring_size: LRC ring size 1708 * @msix_vec: MSI-X interrupt vector (for platforms that support it) 1709 * @flags: LRC initialization flags 1710 * 1711 * Allocate and initialize the Logical Ring Context (LRC). 1712 * 1713 * Return pointer to created LRC upon success and an error pointer 1714 * upon failure. 1715 */ 1716 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, 1717 void *replay_state, u32 ring_size, u16 msix_vec, u32 flags) 1718 { 1719 struct xe_lrc *lrc; 1720 int err; 1721 1722 lrc = kzalloc_obj(*lrc); 1723 if (!lrc) 1724 return ERR_PTR(-ENOMEM); 1725 1726 err = xe_lrc_init(lrc, hwe, vm, replay_state, ring_size, msix_vec, flags); 1727 if (err) { 1728 kfree(lrc); 1729 return ERR_PTR(err); 1730 } 1731 1732 return lrc; 1733 } 1734 1735 /** 1736 * xe_lrc_destroy - Destroy the LRC 1737 * @ref: reference to LRC 1738 * 1739 * Called when ref == 0, release resources held by the Logical Ring Context 1740 * (LRC) and free the LRC memory. 1741 */ 1742 void xe_lrc_destroy(struct kref *ref) 1743 { 1744 struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount); 1745 1746 xe_lrc_finish(lrc); 1747 kfree(lrc); 1748 } 1749 1750 /** 1751 * xe_lrc_update_hwctx_regs_with_address - Re-compute GGTT references within given LRC. 1752 * @lrc: the &xe_lrc struct instance 1753 */ 1754 void xe_lrc_update_hwctx_regs_with_address(struct xe_lrc *lrc) 1755 { 1756 if (xe_lrc_has_indirect_ring_state(lrc)) { 1757 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, 1758 __xe_lrc_indirect_ring_ggtt_addr(lrc)); 1759 1760 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, 1761 __xe_lrc_ring_ggtt_addr(lrc)); 1762 } else { 1763 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); 1764 } 1765 } 1766 1767 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) 1768 { 1769 if (xe_lrc_has_indirect_ring_state(lrc)) 1770 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail); 1771 else 1772 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail); 1773 } 1774 1775 u32 xe_lrc_ring_tail(struct xe_lrc *lrc) 1776 { 1777 if (xe_lrc_has_indirect_ring_state(lrc)) 1778 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR; 1779 else 1780 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; 1781 } 1782 1783 static u32 xe_lrc_ring_start(struct xe_lrc *lrc) 1784 { 1785 if (xe_lrc_has_indirect_ring_state(lrc)) 1786 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START); 1787 else 1788 return xe_lrc_read_ctx_reg(lrc, CTX_RING_START); 1789 } 1790 1791 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 1792 { 1793 if (xe_lrc_has_indirect_ring_state(lrc)) 1794 xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head); 1795 else 1796 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); 1797 } 1798 1799 u32 xe_lrc_ring_head(struct xe_lrc *lrc) 1800 { 1801 if (xe_lrc_has_indirect_ring_state(lrc)) 1802 return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR; 1803 else 1804 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; 1805 } 1806 1807 u32 xe_lrc_ring_space(struct xe_lrc *lrc) 1808 { 1809 const u32 head = xe_lrc_ring_head(lrc); 1810 const u32 tail = lrc->ring.tail; 1811 const u32 size = lrc->ring.size; 1812 1813 return ((head - tail - 1) & (size - 1)) + 1; 1814 } 1815 1816 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, 1817 const void *data, size_t size) 1818 { 1819 struct xe_device *xe = lrc_to_xe(lrc); 1820 1821 iosys_map_incr(&ring, lrc->ring.tail); 1822 xe_map_memcpy_to(xe, &ring, 0, data, size); 1823 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); 1824 } 1825 1826 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) 1827 { 1828 struct xe_device *xe = lrc_to_xe(lrc); 1829 struct iosys_map ring; 1830 u32 rhs; 1831 size_t aligned_size; 1832 1833 xe_assert(xe, IS_ALIGNED(size, 4)); 1834 aligned_size = ALIGN(size, 8); 1835 1836 ring = __xe_lrc_ring_map(lrc); 1837 1838 xe_assert(xe, lrc->ring.tail < lrc->ring.size); 1839 rhs = lrc->ring.size - lrc->ring.tail; 1840 if (size > rhs) { 1841 __xe_lrc_write_ring(lrc, ring, data, rhs); 1842 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); 1843 } else { 1844 __xe_lrc_write_ring(lrc, ring, data, size); 1845 } 1846 1847 if (aligned_size > size) { 1848 u32 noop = MI_NOOP; 1849 1850 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); 1851 } 1852 } 1853 1854 u64 xe_lrc_descriptor(struct xe_lrc *lrc) 1855 { 1856 return lrc->desc | xe_lrc_ggtt_addr(lrc); 1857 } 1858 1859 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) 1860 { 1861 return __xe_lrc_seqno_ggtt_addr(lrc); 1862 } 1863 1864 /** 1865 * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence. 1866 * 1867 * Allocate but don't initialize an lrc seqno fence. 1868 * 1869 * Return: Pointer to the allocated fence or 1870 * negative error pointer on error. 1871 */ 1872 struct dma_fence *xe_lrc_alloc_seqno_fence(void) 1873 { 1874 return xe_hw_fence_alloc(); 1875 } 1876 1877 /** 1878 * xe_lrc_free_seqno_fence() - Free an lrc seqno fence. 1879 * @fence: Pointer to the fence to free. 1880 * 1881 * Frees an lrc seqno fence that hasn't yet been 1882 * initialized. 1883 */ 1884 void xe_lrc_free_seqno_fence(struct dma_fence *fence) 1885 { 1886 xe_hw_fence_free(fence); 1887 } 1888 1889 /** 1890 * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence. 1891 * @lrc: Pointer to the lrc. 1892 * @fence: Pointer to the fence to initialize. 1893 * 1894 * Initializes a pre-allocated lrc seqno fence. 1895 * After initialization, the fence is subject to normal 1896 * dma-fence refcounting. 1897 */ 1898 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence) 1899 { 1900 xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc)); 1901 } 1902 1903 s32 xe_lrc_seqno(struct xe_lrc *lrc) 1904 { 1905 struct iosys_map map = __xe_lrc_seqno_map(lrc); 1906 1907 return xe_map_read32(lrc_to_xe(lrc), &map); 1908 } 1909 1910 s32 xe_lrc_start_seqno(struct xe_lrc *lrc) 1911 { 1912 struct iosys_map map = __xe_lrc_start_seqno_map(lrc); 1913 1914 return xe_map_read32(lrc_to_xe(lrc), &map); 1915 } 1916 1917 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) 1918 { 1919 return __xe_lrc_start_seqno_ggtt_addr(lrc); 1920 } 1921 1922 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) 1923 { 1924 return __xe_lrc_parallel_ggtt_addr(lrc); 1925 } 1926 1927 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) 1928 { 1929 return __xe_lrc_parallel_map(lrc); 1930 } 1931 1932 /** 1933 * xe_lrc_engine_id() - Read engine id value 1934 * @lrc: Pointer to the lrc. 1935 * 1936 * Returns: context id value 1937 */ 1938 static u32 xe_lrc_engine_id(struct xe_lrc *lrc) 1939 { 1940 struct xe_device *xe = lrc_to_xe(lrc); 1941 struct iosys_map map; 1942 1943 map = __xe_lrc_engine_id_map(lrc); 1944 return xe_map_read32(xe, &map); 1945 } 1946 1947 static int instr_dw(u32 cmd_header) 1948 { 1949 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ 1950 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == 1951 GFXPIPE_SINGLE_DW_CMD(0, 0)) 1952 return 1; 1953 1954 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ 1955 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) 1956 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; 1957 1958 /* Most instructions have the # of dwords (minus 2) in 7:0 */ 1959 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; 1960 } 1961 1962 static int dump_mi_command(struct drm_printer *p, 1963 struct xe_gt *gt, 1964 u32 *start, 1965 u32 *dw, 1966 int remaining_dw) 1967 { 1968 u32 inst_header = *dw; 1969 u32 numdw = instr_dw(inst_header); 1970 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); 1971 int num_noop; 1972 1973 /* First check for commands that don't have/use a '# DW' field */ 1974 switch (inst_header & MI_OPCODE) { 1975 case MI_NOOP: 1976 num_noop = 1; 1977 while (num_noop < remaining_dw && 1978 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) 1979 num_noop++; 1980 drm_printf(p, "LRC[%#5tx] = [%#010x] MI_NOOP (%d dwords)\n", 1981 dw - num_noop - start, inst_header, num_noop); 1982 return num_noop; 1983 1984 case MI_TOPOLOGY_FILTER: 1985 drm_printf(p, "LRC[%#5tx] = [%#010x] MI_TOPOLOGY_FILTER\n", 1986 dw - start, inst_header); 1987 return 1; 1988 1989 case MI_BATCH_BUFFER_END: 1990 drm_printf(p, "LRC[%#5tx] = [%#010x] MI_BATCH_BUFFER_END\n", 1991 dw - start, inst_header); 1992 /* Return 'remaining_dw' to consume the rest of the LRC */ 1993 return remaining_dw; 1994 } 1995 1996 /* 1997 * Any remaining commands include a # of dwords. We should make sure 1998 * it doesn't exceed the remaining size of the LRC. 1999 */ 2000 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 2001 numdw = remaining_dw; 2002 2003 switch (inst_header & MI_OPCODE) { 2004 case MI_LOAD_REGISTER_IMM: 2005 drm_printf(p, "LRC[%#5tx] = [%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", 2006 dw - start, inst_header, (numdw - 1) / 2); 2007 for (int i = 1; i < numdw; i += 2) 2008 drm_printf(p, "LRC[%#5tx] = - %#6x = %#010x\n", 2009 &dw[i] - start, dw[i], dw[i + 1]); 2010 return numdw; 2011 2012 case MI_LOAD_REGISTER_MEM & MI_OPCODE: 2013 drm_printf(p, "LRC[%#5tx] = [%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", 2014 dw - start, inst_header, 2015 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", 2016 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); 2017 if (numdw == 4) 2018 drm_printf(p, "LRC[%#5tx] = - %#6x = %#010llx\n", 2019 dw - start, 2020 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); 2021 else 2022 drm_printf(p, "LRC[%#5tx] = - %*ph (%s)\n", 2023 dw - start, (int)sizeof(u32) * (numdw - 1), 2024 dw + 1, numdw < 4 ? "truncated" : "malformed"); 2025 return numdw; 2026 2027 case MI_FORCE_WAKEUP: 2028 drm_printf(p, "LRC[%#5tx] = [%#010x] MI_FORCE_WAKEUP\n", 2029 dw - start, inst_header); 2030 return numdw; 2031 2032 default: 2033 drm_printf(p, "LRC[%#5tx] = [%#010x] unknown MI opcode %#x, likely %d dwords\n", 2034 dw - start, inst_header, opcode, numdw); 2035 return numdw; 2036 } 2037 } 2038 2039 static int dump_gfxpipe_command(struct drm_printer *p, 2040 struct xe_gt *gt, 2041 u32 *start, 2042 u32 *dw, 2043 int remaining_dw) 2044 { 2045 u32 numdw = instr_dw(*dw); 2046 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); 2047 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); 2048 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); 2049 2050 /* 2051 * Make sure we haven't mis-parsed a number of dwords that exceeds the 2052 * remaining size of the LRC. 2053 */ 2054 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 2055 numdw = remaining_dw; 2056 2057 switch (*dw & GFXPIPE_MATCH_MASK) { 2058 #define MATCH(cmd) \ 2059 case cmd: \ 2060 drm_printf(p, "LRC[%#5tx] = [%#010x] " #cmd " (%d dwords)\n", \ 2061 dw - start, *dw, numdw); \ 2062 return numdw 2063 #define MATCH3D(cmd) \ 2064 case CMD_##cmd: \ 2065 drm_printf(p, "LRC[%#5tx] = [%#010x] " #cmd " (%d dwords)\n", \ 2066 dw - start, *dw, numdw); \ 2067 return numdw 2068 2069 MATCH(STATE_BASE_ADDRESS); 2070 MATCH(STATE_SIP); 2071 MATCH(GPGPU_CSR_BASE_ADDRESS); 2072 MATCH(STATE_COMPUTE_MODE); 2073 MATCH3D(3DSTATE_BTD); 2074 MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); 2075 MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); 2076 2077 MATCH3D(3DSTATE_VF_STATISTICS); 2078 2079 MATCH(PIPELINE_SELECT); 2080 2081 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); 2082 MATCH3D(3DSTATE_CUSTOM_SAMPLE_PATTERN); 2083 MATCH3D(3DSTATE_CLEAR_PARAMS); 2084 MATCH3D(3DSTATE_DEPTH_BUFFER); 2085 MATCH3D(3DSTATE_STENCIL_BUFFER); 2086 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); 2087 MATCH3D(3DSTATE_VERTEX_BUFFERS); 2088 MATCH3D(3DSTATE_VERTEX_ELEMENTS); 2089 MATCH3D(3DSTATE_INDEX_BUFFER); 2090 MATCH3D(3DSTATE_VF); 2091 MATCH3D(3DSTATE_MULTISAMPLE); 2092 MATCH3D(3DSTATE_CC_STATE_POINTERS); 2093 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); 2094 MATCH3D(3DSTATE_VS); 2095 MATCH3D(3DSTATE_GS); 2096 MATCH3D(3DSTATE_CLIP); 2097 MATCH3D(3DSTATE_SF); 2098 MATCH3D(3DSTATE_WM); 2099 MATCH3D(3DSTATE_CONSTANT_VS); 2100 MATCH3D(3DSTATE_CONSTANT_GS); 2101 MATCH3D(3DSTATE_CONSTANT_PS); 2102 MATCH3D(3DSTATE_SAMPLE_MASK); 2103 MATCH3D(3DSTATE_CONSTANT_HS); 2104 MATCH3D(3DSTATE_CONSTANT_DS); 2105 MATCH3D(3DSTATE_HS); 2106 MATCH3D(3DSTATE_TE); 2107 MATCH3D(3DSTATE_DS); 2108 MATCH3D(3DSTATE_STREAMOUT); 2109 MATCH3D(3DSTATE_SBE); 2110 MATCH3D(3DSTATE_PS); 2111 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); 2112 MATCH3D(3DSTATE_CPS_POINTERS); 2113 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); 2114 MATCH3D(3DSTATE_BLEND_STATE_POINTERS); 2115 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); 2116 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); 2117 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); 2118 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); 2119 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); 2120 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); 2121 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); 2122 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); 2123 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); 2124 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); 2125 MATCH3D(3DSTATE_VF_INSTANCING); 2126 MATCH3D(3DSTATE_VF_SGVS); 2127 MATCH3D(3DSTATE_VF_TOPOLOGY); 2128 MATCH3D(3DSTATE_WM_CHROMAKEY); 2129 MATCH3D(3DSTATE_PS_BLEND); 2130 MATCH3D(3DSTATE_WM_DEPTH_STENCIL); 2131 MATCH3D(3DSTATE_PS_EXTRA); 2132 MATCH3D(3DSTATE_RASTER); 2133 MATCH3D(3DSTATE_SBE_SWIZ); 2134 MATCH3D(3DSTATE_WM_HZ_OP); 2135 MATCH3D(3DSTATE_VF_COMPONENT_PACKING); 2136 MATCH3D(3DSTATE_VF_SGVS_2); 2137 MATCH3D(3DSTATE_VFG); 2138 MATCH3D(3DSTATE_URB_ALLOC_VS); 2139 MATCH3D(3DSTATE_URB_ALLOC_HS); 2140 MATCH3D(3DSTATE_URB_ALLOC_DS); 2141 MATCH3D(3DSTATE_URB_ALLOC_GS); 2142 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); 2143 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); 2144 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); 2145 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); 2146 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); 2147 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); 2148 MATCH3D(3DSTATE_AMFS); 2149 MATCH3D(3DSTATE_DEPTH_BOUNDS); 2150 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); 2151 MATCH3D(3DSTATE_CONSTANT_TS_POINTER); 2152 MATCH3D(3DSTATE_MESH_CONTROL); 2153 MATCH3D(3DSTATE_MESH_DISTRIB); 2154 MATCH3D(3DSTATE_TASK_REDISTRIB); 2155 MATCH3D(3DSTATE_MESH_SHADER); 2156 MATCH3D(3DSTATE_MESH_SHADER_DATA); 2157 MATCH3D(3DSTATE_TASK_CONTROL); 2158 MATCH3D(3DSTATE_TASK_SHADER); 2159 MATCH3D(3DSTATE_TASK_SHADER_DATA); 2160 MATCH3D(3DSTATE_URB_ALLOC_MESH); 2161 MATCH3D(3DSTATE_URB_ALLOC_TASK); 2162 MATCH3D(3DSTATE_CLIP_MESH); 2163 MATCH3D(3DSTATE_SBE_MESH); 2164 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); 2165 MATCH3D(3DSTATE_COARSE_PIXEL); 2166 MATCH3D(3DSTATE_MESH_SHADER_DATA_EXT); 2167 MATCH3D(3DSTATE_TASK_SHADER_DATA_EXT); 2168 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC_2); 2169 MATCH3D(3DSTATE_CC_STATE_POINTERS_2); 2170 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS_2); 2171 MATCH3D(3DSTATE_BLEND_STATE_POINTERS_2); 2172 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_2); 2173 2174 MATCH3D(3DSTATE_DRAWING_RECTANGLE); 2175 MATCH3D(3DSTATE_URB_MEMORY); 2176 MATCH3D(3DSTATE_CHROMA_KEY); 2177 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); 2178 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); 2179 MATCH3D(3DSTATE_LINE_STIPPLE); 2180 MATCH3D(3DSTATE_AA_LINE_PARAMETERS); 2181 MATCH3D(3DSTATE_MONOFILTER_SIZE); 2182 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); 2183 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); 2184 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); 2185 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); 2186 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); 2187 MATCH3D(3DSTATE_SO_DECL_LIST); 2188 MATCH3D(3DSTATE_SO_BUFFER); 2189 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); 2190 MATCH3D(3DSTATE_SAMPLE_PATTERN); 2191 MATCH3D(3DSTATE_3D_MODE); 2192 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); 2193 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); 2194 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); 2195 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTER_2); 2196 2197 default: 2198 drm_printf(p, "LRC[%#5tx] = [%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", 2199 dw - start, *dw, pipeline, opcode, subopcode, numdw); 2200 return numdw; 2201 } 2202 } 2203 2204 static int dump_gfx_state_command(struct drm_printer *p, 2205 struct xe_gt *gt, 2206 u32 *start, 2207 u32 *dw, 2208 int remaining_dw) 2209 { 2210 u32 numdw = instr_dw(*dw); 2211 u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); 2212 2213 /* 2214 * Make sure we haven't mis-parsed a number of dwords that exceeds the 2215 * remaining size of the LRC. 2216 */ 2217 if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) 2218 numdw = remaining_dw; 2219 2220 switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { 2221 MATCH(STATE_WRITE_INLINE); 2222 2223 default: 2224 drm_printf(p, "LRC[%#5tx] = [%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", 2225 dw - start, *dw, opcode, numdw); 2226 return numdw; 2227 } 2228 } 2229 2230 void xe_lrc_dump_default(struct drm_printer *p, 2231 struct xe_gt *gt, 2232 enum xe_engine_class hwe_class) 2233 { 2234 u32 *dw, *start; 2235 int remaining_dw, num_dw; 2236 2237 if (!gt->default_lrc[hwe_class]) { 2238 drm_printf(p, "No default LRC for class %d\n", hwe_class); 2239 return; 2240 } 2241 2242 /* 2243 * Skip the beginning of the LRC since it contains the per-process 2244 * hardware status page. 2245 */ 2246 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 2247 start = dw; 2248 remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; 2249 2250 while (remaining_dw > 0) { 2251 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { 2252 num_dw = dump_mi_command(p, gt, start, dw, remaining_dw); 2253 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { 2254 num_dw = dump_gfxpipe_command(p, gt, start, dw, remaining_dw); 2255 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { 2256 num_dw = dump_gfx_state_command(p, gt, start, dw, remaining_dw); 2257 } else { 2258 num_dw = min(instr_dw(*dw), remaining_dw); 2259 drm_printf(p, "LRC[%#5tx] = [%#10x] Unknown instruction of type %#x, likely %d dwords\n", 2260 dw - start, 2261 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), 2262 num_dw); 2263 } 2264 2265 dw += num_dw; 2266 remaining_dw -= num_dw; 2267 } 2268 } 2269 2270 /* 2271 * Lookup the value of a register within the offset/value pairs of an 2272 * MI_LOAD_REGISTER_IMM instruction. 2273 * 2274 * Return -ENOENT if the register is not present in the MI_LRI instruction. 2275 */ 2276 static int lookup_reg_in_mi_lri(u32 offset, u32 *value, 2277 const u32 *dword_pair, int num_regs) 2278 { 2279 for (int i = 0; i < num_regs; i++) { 2280 if (dword_pair[2 * i] == offset) { 2281 *value = dword_pair[2 * i + 1]; 2282 return 0; 2283 } 2284 } 2285 2286 return -ENOENT; 2287 } 2288 2289 /* 2290 * Lookup the value of a register in a specific engine type's default LRC. 2291 * 2292 * Return -EINVAL if the default LRC doesn't exist, or ENOENT if the register 2293 * cannot be found in the default LRC. 2294 */ 2295 int xe_lrc_lookup_default_reg_value(struct xe_gt *gt, 2296 enum xe_engine_class hwe_class, 2297 u32 offset, 2298 u32 *value) 2299 { 2300 u32 *dw; 2301 int remaining_dw, ret; 2302 2303 if (!gt->default_lrc[hwe_class]) 2304 return -EINVAL; 2305 2306 /* 2307 * Skip the beginning of the LRC since it contains the per-process 2308 * hardware status page. 2309 */ 2310 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; 2311 remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; 2312 2313 while (remaining_dw > 0) { 2314 u32 num_dw = instr_dw(*dw); 2315 2316 if (num_dw > remaining_dw) 2317 num_dw = remaining_dw; 2318 2319 switch (*dw & XE_INSTR_CMD_TYPE) { 2320 case XE_INSTR_MI: 2321 switch (*dw & MI_OPCODE) { 2322 case MI_BATCH_BUFFER_END: 2323 /* End of LRC; register not found */ 2324 return -ENOENT; 2325 2326 case MI_NOOP: 2327 case MI_TOPOLOGY_FILTER: 2328 /* 2329 * MI_NOOP and MI_TOPOLOGY_FILTER don't have 2330 * a length field and are always 1-dword 2331 * instructions. 2332 */ 2333 remaining_dw--; 2334 dw++; 2335 break; 2336 2337 case MI_LOAD_REGISTER_IMM: 2338 ret = lookup_reg_in_mi_lri(offset, value, 2339 dw + 1, (num_dw - 1) / 2); 2340 if (ret == 0) 2341 return 0; 2342 2343 fallthrough; 2344 2345 default: 2346 /* 2347 * Jump to next instruction based on length 2348 * field. 2349 */ 2350 remaining_dw -= num_dw; 2351 dw += num_dw; 2352 break; 2353 } 2354 break; 2355 2356 default: 2357 /* Jump to next instruction based on length field. */ 2358 remaining_dw -= num_dw; 2359 dw += num_dw; 2360 } 2361 } 2362 2363 return -ENOENT; 2364 } 2365 2366 struct instr_state { 2367 u32 instr; 2368 u16 num_dw; 2369 }; 2370 2371 static const struct instr_state xe_hpg_svg_state[] = { 2372 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, 2373 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, 2374 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, 2375 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, 2376 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, 2377 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, 2378 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, 2379 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, 2380 { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, 2381 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, 2382 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, 2383 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, 2384 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, 2385 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, 2386 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, 2387 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, 2388 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, 2389 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, 2390 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, 2391 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, 2392 { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, 2393 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, 2394 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, 2395 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, 2396 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, 2397 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, 2398 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, 2399 { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, 2400 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, 2401 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, 2402 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, 2403 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, 2404 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, 2405 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, 2406 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, 2407 { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, 2408 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, 2409 { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, 2410 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, 2411 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, 2412 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, 2413 { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, 2414 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, 2415 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, 2416 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, 2417 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, 2418 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, 2419 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, 2420 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, 2421 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, 2422 }; 2423 2424 u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs) 2425 { 2426 struct xe_gt *gt = q->hwe->gt; 2427 struct xe_device *xe = gt_to_xe(gt); 2428 const struct instr_state *state_table = NULL; 2429 int state_table_size = 0; 2430 2431 /* 2432 * Wa_14019789679 2433 * 2434 * If the driver doesn't explicitly emit the SVG instructions while 2435 * setting up the default LRC, the context switch will write 0's 2436 * (noops) into the LRC memory rather than the expected instruction 2437 * headers. Application contexts start out as a copy of the default 2438 * LRC, and if they also do not emit specific settings for some SVG 2439 * state, then on context restore they'll unintentionally inherit 2440 * whatever state setting the previous context had programmed into the 2441 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will 2442 * prevent the hardware from resetting that state back to any specific 2443 * value). 2444 * 2445 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL 2446 * since that's a specific state setting that can easily cause GPU 2447 * hangs if unintentionally inherited. However to be safe we'll 2448 * continue to emit all of the SVG state since it's best not to leak 2449 * any of the state between contexts, even if that leakage is harmless. 2450 */ 2451 if (XE_GT_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { 2452 state_table = xe_hpg_svg_state; 2453 state_table_size = ARRAY_SIZE(xe_hpg_svg_state); 2454 } 2455 2456 if (!state_table) { 2457 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", 2458 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); 2459 return cs; 2460 } 2461 2462 for (int i = 0; i < state_table_size; i++) { 2463 u32 instr = state_table[i].instr; 2464 u16 num_dw = state_table[i].num_dw; 2465 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); 2466 2467 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); 2468 xe_gt_assert(gt, num_dw != 0); 2469 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); 2470 2471 /* 2472 * Xe2's SVG context is the same as the one on DG2 / MTL 2473 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has 2474 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). 2475 * Just make the replacement here rather than defining a 2476 * whole separate table for the single trivial change. 2477 */ 2478 if (GRAPHICS_VER(xe) >= 20 && 2479 instr == CMD_3DSTATE_DRAWING_RECTANGLE) 2480 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; 2481 2482 *cs = instr; 2483 if (!is_single_dw) 2484 *cs |= (num_dw - 2); 2485 2486 cs += num_dw; 2487 } 2488 2489 return cs; 2490 } 2491 2492 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) 2493 { 2494 struct xe_lrc_snapshot *snapshot = kmalloc_obj(*snapshot, GFP_NOWAIT); 2495 2496 if (!snapshot) 2497 return NULL; 2498 2499 snapshot->context_desc = xe_lrc_ggtt_addr(lrc); 2500 snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc); 2501 snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); 2502 snapshot->head = xe_lrc_ring_head(lrc); 2503 snapshot->tail.internal = lrc->ring.tail; 2504 snapshot->tail.memory = xe_lrc_ring_tail(lrc); 2505 snapshot->start = xe_lrc_ring_start(lrc); 2506 snapshot->start_seqno = xe_lrc_start_seqno(lrc); 2507 snapshot->seqno = xe_lrc_seqno(lrc); 2508 snapshot->lrc_bo = xe_bo_get(lrc->bo); 2509 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); 2510 snapshot->lrc_size = lrc->size; 2511 snapshot->replay_offset = 0; 2512 snapshot->replay_size = lrc->replay_size; 2513 snapshot->lrc_snapshot = NULL; 2514 snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 2515 snapshot->ctx_timestamp_ms = 2516 xe_gt_clock_interval_to_ms(lrc->gt, xe_lrc_ctx_timestamp(lrc)); 2517 if (xe_lrc_is_multi_queue(lrc)) { 2518 snapshot->queue_timestamp = xe_lrc_queue_timestamp(lrc); 2519 snapshot->queue_timestamp_ms = 2520 xe_gt_clock_interval_to_ms(lrc->gt, snapshot->queue_timestamp); 2521 } else { 2522 snapshot->queue_timestamp = 0; 2523 snapshot->queue_timestamp_ms = 0; 2524 } 2525 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); 2526 return snapshot; 2527 } 2528 2529 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) 2530 { 2531 struct xe_bo *bo; 2532 struct iosys_map src; 2533 2534 if (!snapshot) 2535 return; 2536 2537 bo = snapshot->lrc_bo; 2538 snapshot->lrc_bo = NULL; 2539 2540 snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); 2541 if (!snapshot->lrc_snapshot) 2542 goto put_bo; 2543 2544 xe_bo_lock(bo, false); 2545 if (!ttm_bo_vmap(&bo->ttm, &src)) { 2546 xe_map_memcpy_from(xe_bo_device(bo), 2547 snapshot->lrc_snapshot, &src, snapshot->lrc_offset, 2548 snapshot->lrc_size); 2549 ttm_bo_vunmap(&bo->ttm, &src); 2550 } else { 2551 kvfree(snapshot->lrc_snapshot); 2552 snapshot->lrc_snapshot = NULL; 2553 } 2554 xe_bo_unlock(bo); 2555 put_bo: 2556 xe_bo_put(bo); 2557 } 2558 2559 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) 2560 { 2561 unsigned long i; 2562 2563 if (!snapshot) 2564 return; 2565 2566 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); 2567 drm_printf(p, "\tHW Ring address: 0x%08x\n", 2568 snapshot->ring_addr); 2569 drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", 2570 snapshot->indirect_context_desc); 2571 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); 2572 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 2573 snapshot->tail.internal, snapshot->tail.memory); 2574 drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start); 2575 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); 2576 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); 2577 drm_printf(p, "\tTimestamp: 0x%016llx\n", snapshot->ctx_timestamp); 2578 drm_printf(p, "\tTimestamp ms: %llu\n", snapshot->ctx_timestamp_ms); 2579 drm_printf(p, "\tQueue Timestamp: 0x%016llx\n", snapshot->queue_timestamp); 2580 drm_printf(p, "\tQueue Timestamp ms: %llu\n", snapshot->queue_timestamp_ms); 2581 drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); 2582 2583 if (!snapshot->lrc_snapshot) 2584 return; 2585 2586 drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); 2587 drm_puts(p, "\t[HWSP].data: "); 2588 for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { 2589 u32 *val = snapshot->lrc_snapshot + i; 2590 char dumped[ASCII85_BUFSZ]; 2591 2592 drm_puts(p, ascii85_encode(*val, dumped)); 2593 } 2594 2595 drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); 2596 drm_printf(p, "\n\t[HWCTX].replay_offset: 0x%lx\n", snapshot->replay_offset); 2597 drm_printf(p, "\n\t[HWCTX].replay_length: 0x%lx\n", snapshot->replay_size); 2598 2599 drm_puts(p, "\t[HWCTX].data: "); 2600 for (; i < snapshot->lrc_size; i += sizeof(u32)) { 2601 u32 *val = snapshot->lrc_snapshot + i; 2602 char dumped[ASCII85_BUFSZ]; 2603 2604 drm_puts(p, ascii85_encode(*val, dumped)); 2605 } 2606 drm_puts(p, "\n"); 2607 } 2608 2609 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) 2610 { 2611 if (!snapshot) 2612 return; 2613 2614 kvfree(snapshot->lrc_snapshot); 2615 if (snapshot->lrc_bo) 2616 xe_bo_put(snapshot->lrc_bo); 2617 2618 kfree(snapshot); 2619 } 2620 2621 static struct xe_hw_engine *engine_id_to_hwe(struct xe_gt *gt, u32 engine_id) 2622 { 2623 u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id); 2624 u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id); 2625 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, class, instance, false); 2626 2627 if (xe_gt_WARN_ONCE(gt, !hwe || xe_hw_engine_is_reserved(hwe), 2628 "Unexpected engine class:instance %d:%d for utilization\n", 2629 class, instance)) 2630 return NULL; 2631 2632 return hwe; 2633 } 2634 2635 static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts) 2636 { 2637 struct xe_hw_engine *hwe; 2638 u64 val; 2639 2640 hwe = engine_id_to_hwe(lrc->gt, engine_id); 2641 if (!hwe) 2642 return -1; 2643 2644 if (lrc_to_xe(lrc)->info.has_64bit_timestamp) 2645 val = xe_mmio_read64_2x32(&hwe->gt->mmio, 2646 RING_CTX_TIMESTAMP(hwe->mmio_base)); 2647 else 2648 val = xe_mmio_read32(&hwe->gt->mmio, 2649 RING_CTX_TIMESTAMP(hwe->mmio_base)); 2650 2651 *reg_ctx_ts = val; 2652 2653 return 0; 2654 } 2655 2656 static u64 get_queue_timestamp(struct xe_hw_engine *hwe) 2657 { 2658 return xe_mmio_read64_2x32(&hwe->gt->mmio, 2659 RING_QUEUE_TIMESTAMP(hwe->mmio_base)); 2660 } 2661 2662 static u32 get_multi_queue_active_queue_id(struct xe_hw_engine *hwe) 2663 { 2664 u32 val = xe_mmio_read32(&hwe->gt->mmio, 2665 RING_CSMQDEBUG(hwe->mmio_base)); 2666 2667 return REG_FIELD_GET(CURRENT_ACTIVE_QUEUE_ID_MASK, val); 2668 } 2669 2670 static bool context_active(struct xe_lrc *lrc) 2671 { 2672 return xe_lrc_ctx_timestamp(lrc) == CONTEXT_ACTIVE; 2673 } 2674 2675 static u64 xe_lrc_multi_queue_timestamp(struct xe_lrc *lrc) 2676 { 2677 struct xe_device *xe = lrc_to_xe(lrc); 2678 struct xe_lrc *primary_lrc = lrc->multi_queue.primary_lrc; 2679 struct xe_hw_engine *hwe; 2680 u64 reg_queue_ts = lrc->queue_timestamp; 2681 2682 if (IS_SRIOV_VF(xe)) 2683 return xe_lrc_queue_timestamp(lrc); 2684 2685 xe_assert(xe, primary_lrc); 2686 2687 /* WA BB populates CONTEXT_ACTIVE cookie for primary context only */ 2688 if (!context_active(primary_lrc)) 2689 return xe_lrc_queue_timestamp(lrc); 2690 2691 /* WA BB populates engine id in PPHWSP of primary context only */ 2692 hwe = engine_id_to_hwe(primary_lrc->gt, xe_lrc_engine_id(primary_lrc)); 2693 if (!hwe) 2694 return xe_lrc_queue_timestamp(lrc); 2695 2696 if (get_multi_queue_active_queue_id(hwe) != lrc->multi_queue.pos) 2697 return xe_lrc_queue_timestamp(lrc); 2698 2699 /* queue is active, so store the queue timestamp register */ 2700 reg_queue_ts = get_queue_timestamp(hwe); 2701 2702 /* double check queue and primary queue are both still active */ 2703 if (get_multi_queue_active_queue_id(hwe) != lrc->multi_queue.pos || 2704 !context_active(primary_lrc)) 2705 return xe_lrc_queue_timestamp(lrc); 2706 2707 return reg_queue_ts; 2708 } 2709 2710 static u64 xe_lrc_update_multi_queue_timestamp(struct xe_lrc *lrc, u64 *old_ts) 2711 { 2712 *old_ts = lrc->queue_timestamp; 2713 lrc->queue_timestamp = xe_lrc_multi_queue_timestamp(lrc); 2714 2715 trace_xe_lrc_update_queue_timestamp(lrc, *old_ts); 2716 2717 return lrc->queue_timestamp; 2718 } 2719 2720 static u64 xe_lrc_context_timestamp(struct xe_lrc *lrc) 2721 { 2722 u64 reg_ts, new_ts = lrc->ctx_timestamp; 2723 2724 /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */ 2725 if (IS_SRIOV_VF(lrc_to_xe(lrc))) 2726 return xe_lrc_ctx_timestamp(lrc); 2727 2728 if (context_active(lrc) && 2729 !get_ctx_timestamp(lrc, xe_lrc_engine_id(lrc), ®_ts)) 2730 new_ts = reg_ts; 2731 2732 /* 2733 * If context swicthed out while we were here, just return the latest 2734 * LRC CTX TIMESTAMP value. 2735 */ 2736 if (!context_active(lrc)) 2737 return xe_lrc_ctx_timestamp(lrc); 2738 2739 return new_ts; 2740 } 2741 2742 static u64 xe_lrc_update_context_timestamp(struct xe_lrc *lrc, u64 *old_ts) 2743 { 2744 *old_ts = lrc->ctx_timestamp; 2745 lrc->ctx_timestamp = xe_lrc_context_timestamp(lrc); 2746 2747 trace_xe_lrc_update_timestamp(lrc, *old_ts); 2748 2749 return lrc->ctx_timestamp; 2750 } 2751 2752 /** 2753 * xe_lrc_timestamp() - Current lrc timestamp 2754 * @lrc: Pointer to the lrc. 2755 * 2756 * Return latest lrc timestamp. With support for active contexts/queues, the 2757 * calculation may be slightly racy, so follow a read-again logic to ensure that 2758 * the context/queue is still active before returning the right timestamp. 2759 * 2760 * Returns: New lrc timestamp value 2761 */ 2762 u64 xe_lrc_timestamp(struct xe_lrc *lrc) 2763 { 2764 if (xe_lrc_is_multi_queue(lrc)) 2765 return xe_lrc_multi_queue_timestamp(lrc); 2766 else 2767 return xe_lrc_context_timestamp(lrc); 2768 } 2769 2770 /** 2771 * xe_lrc_update_timestamp() - Update lrc timestamp 2772 * @lrc: Pointer to the lrc. 2773 * @old_ts: Old timestamp value 2774 * 2775 * Populate @old_ts with current saved lrc timestamp, read new lrc timestamp and 2776 * update saved value. 2777 * 2778 * Returns: New lrc timestamp value 2779 */ 2780 u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts) 2781 { 2782 if (xe_lrc_is_multi_queue(lrc)) 2783 return xe_lrc_update_multi_queue_timestamp(lrc, old_ts); 2784 else 2785 return xe_lrc_update_context_timestamp(lrc, old_ts); 2786 } 2787 2788 /** 2789 * xe_lrc_ring_is_idle() - LRC is idle 2790 * @lrc: Pointer to the lrc. 2791 * 2792 * Compare LRC ring head and tail to determine if idle. 2793 * 2794 * Return: True is ring is idle, False otherwise 2795 */ 2796 bool xe_lrc_ring_is_idle(struct xe_lrc *lrc) 2797 { 2798 return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc); 2799 } 2800