1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2018 Intel Corporation 4 */ 5 6 #include <linux/prime_numbers.h> 7 8 #include "gem/i915_gem_internal.h" 9 10 #include "i915_selftest.h" 11 #include "intel_engine_heartbeat.h" 12 #include "intel_engine_pm.h" 13 #include "intel_reset.h" 14 #include "intel_ring.h" 15 #include "selftest_engine_heartbeat.h" 16 #include "selftests/i915_random.h" 17 #include "selftests/igt_flush_test.h" 18 #include "selftests/igt_live_test.h" 19 #include "selftests/igt_spinner.h" 20 #include "selftests/lib_sw_fence.h" 21 #include "shmem_utils.h" 22 23 #include "gem/selftests/igt_gem_utils.h" 24 #include "gem/selftests/mock_context.h" 25 26 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 27 #define NUM_GPR 16 28 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ 29 30 #define LRI_HEADER MI_INSTR(0x22, 0) 31 #define LRI_LENGTH_MASK GENMASK(7, 0) 32 33 static struct i915_vma *create_scratch(struct intel_gt *gt) 34 { 35 return __vm_create_scratch_for_read_pinned(>->ggtt->vm, PAGE_SIZE); 36 } 37 38 static bool is_active(struct i915_request *rq) 39 { 40 if (i915_request_is_active(rq)) 41 return true; 42 43 if (i915_request_on_hold(rq)) 44 return true; 45 46 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq)) 47 return true; 48 49 return false; 50 } 51 52 static int wait_for_submit(struct intel_engine_cs *engine, 53 struct i915_request *rq, 54 unsigned long timeout) 55 { 56 /* Ignore our own attempts to suppress excess tasklets */ 57 tasklet_hi_schedule(&engine->sched_engine->tasklet); 58 59 timeout += jiffies; 60 do { 61 bool done = time_after(jiffies, timeout); 62 63 if (i915_request_completed(rq)) /* that was quick! */ 64 return 0; 65 66 /* Wait until the HW has acknowleged the submission (or err) */ 67 intel_engine_flush_submission(engine); 68 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) 69 return 0; 70 71 if (done) 72 return -ETIME; 73 74 cond_resched(); 75 } while (1); 76 } 77 78 static int emit_semaphore_signal(struct intel_context *ce, void *slot) 79 { 80 const u32 offset = 81 i915_ggtt_offset(ce->engine->status_page.vma) + 82 offset_in_page(slot); 83 struct i915_request *rq; 84 u32 *cs; 85 86 rq = intel_context_create_request(ce); 87 if (IS_ERR(rq)) 88 return PTR_ERR(rq); 89 90 cs = intel_ring_begin(rq, 4); 91 if (IS_ERR(cs)) { 92 i915_request_add(rq); 93 return PTR_ERR(cs); 94 } 95 96 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 97 *cs++ = offset; 98 *cs++ = 0; 99 *cs++ = 1; 100 101 intel_ring_advance(rq, cs); 102 103 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 104 i915_request_add(rq); 105 return 0; 106 } 107 108 static int context_flush(struct intel_context *ce, long timeout) 109 { 110 struct i915_request *rq; 111 struct dma_fence *fence; 112 int err = 0; 113 114 rq = intel_engine_create_kernel_request(ce->engine); 115 if (IS_ERR(rq)) 116 return PTR_ERR(rq); 117 118 fence = i915_active_fence_get(&ce->timeline->last_request); 119 if (fence) { 120 i915_request_await_dma_fence(rq, fence); 121 dma_fence_put(fence); 122 } 123 124 rq = i915_request_get(rq); 125 i915_request_add(rq); 126 if (i915_request_wait(rq, 0, timeout) < 0) 127 err = -ETIME; 128 i915_request_put(rq); 129 130 rmb(); /* We know the request is written, make sure all state is too! */ 131 return err; 132 } 133 134 static int get_lri_mask(struct intel_engine_cs *engine, u32 lri) 135 { 136 if ((lri & MI_LRI_LRM_CS_MMIO) == 0) 137 return ~0u; 138 139 if (GRAPHICS_VER(engine->i915) < 12) 140 return 0xfff; 141 142 switch (engine->class) { 143 default: 144 case RENDER_CLASS: 145 case COMPUTE_CLASS: 146 return 0x07ff; 147 case COPY_ENGINE_CLASS: 148 return 0x0fff; 149 case VIDEO_DECODE_CLASS: 150 case VIDEO_ENHANCEMENT_CLASS: 151 return 0x3fff; 152 } 153 } 154 155 static int live_lrc_layout(void *arg) 156 { 157 struct intel_gt *gt = arg; 158 struct intel_engine_cs *engine; 159 enum intel_engine_id id; 160 u32 *lrc; 161 int err; 162 163 /* 164 * Check the registers offsets we use to create the initial reg state 165 * match the layout saved by HW. 166 */ 167 168 lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */ 169 if (!lrc) 170 return -ENOMEM; 171 GEM_BUG_ON(offset_in_page(lrc)); 172 173 err = 0; 174 for_each_engine(engine, gt, id) { 175 u32 *hw; 176 int dw; 177 178 if (!engine->default_state) 179 continue; 180 181 hw = shmem_pin_map(engine->default_state); 182 if (!hw) { 183 err = -ENOMEM; 184 break; 185 } 186 hw += LRC_STATE_OFFSET / sizeof(*hw); 187 188 __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE), 189 engine->kernel_context, engine, true); 190 191 dw = 0; 192 do { 193 u32 lri = READ_ONCE(hw[dw]); 194 u32 lri_mask; 195 196 if (lri == 0) { 197 dw++; 198 continue; 199 } 200 201 if (lrc[dw] == 0) { 202 pr_debug("%s: skipped instruction %x at dword %d\n", 203 engine->name, lri, dw); 204 dw++; 205 continue; 206 } 207 208 if ((lri & GENMASK(31, 23)) != LRI_HEADER) { 209 pr_err("%s: Expected LRI command at dword %d, found %08x\n", 210 engine->name, dw, lri); 211 err = -EINVAL; 212 break; 213 } 214 215 if (lrc[dw] != lri) { 216 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", 217 engine->name, dw, lri, lrc[dw]); 218 err = -EINVAL; 219 break; 220 } 221 222 /* 223 * When bit 19 of MI_LOAD_REGISTER_IMM instruction 224 * opcode is set on Gen12+ devices, HW does not 225 * care about certain register address offsets, and 226 * instead check the following for valid address 227 * ranges on specific engines: 228 * RCS && CCS: BITS(0 - 10) 229 * BCS: BITS(0 - 11) 230 * VECS && VCS: BITS(0 - 13) 231 */ 232 lri_mask = get_lri_mask(engine, lri); 233 234 lri &= 0x7f; 235 lri++; 236 dw++; 237 238 while (lri) { 239 u32 offset = READ_ONCE(hw[dw]); 240 241 if ((offset ^ lrc[dw]) & lri_mask) { 242 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", 243 engine->name, dw, offset, lrc[dw]); 244 err = -EINVAL; 245 break; 246 } 247 248 /* 249 * Skip over the actual register value as we 250 * expect that to differ. 251 */ 252 dw += 2; 253 lri -= 2; 254 } 255 } while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 256 257 if (err) { 258 pr_info("%s: HW register image:\n", engine->name); 259 igt_hexdump(hw, PAGE_SIZE); 260 261 pr_info("%s: SW register image:\n", engine->name); 262 igt_hexdump(lrc, PAGE_SIZE); 263 } 264 265 shmem_unpin_map(engine->default_state, hw); 266 if (err) 267 break; 268 } 269 270 free_page((unsigned long)lrc); 271 return err; 272 } 273 274 static int find_offset(const u32 *lri, u32 offset) 275 { 276 int i; 277 278 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) 279 if (lri[i] == offset) 280 return i; 281 282 return -1; 283 } 284 285 static int live_lrc_fixed(void *arg) 286 { 287 struct intel_gt *gt = arg; 288 struct intel_engine_cs *engine; 289 enum intel_engine_id id; 290 int err = 0; 291 292 /* 293 * Check the assumed register offsets match the actual locations in 294 * the context image. 295 */ 296 297 for_each_engine(engine, gt, id) { 298 const struct { 299 u32 reg; 300 u32 offset; 301 const char *name; 302 } tbl[] = { 303 { 304 i915_mmio_reg_offset(RING_START(engine->mmio_base)), 305 CTX_RING_START - 1, 306 "RING_START" 307 }, 308 { 309 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), 310 CTX_RING_CTL - 1, 311 "RING_CTL" 312 }, 313 { 314 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), 315 CTX_RING_HEAD - 1, 316 "RING_HEAD" 317 }, 318 { 319 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), 320 CTX_RING_TAIL - 1, 321 "RING_TAIL" 322 }, 323 { 324 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), 325 lrc_ring_mi_mode(engine), 326 "RING_MI_MODE" 327 }, 328 { 329 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), 330 CTX_BB_STATE - 1, 331 "BB_STATE" 332 }, 333 { 334 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)), 335 lrc_ring_wa_bb_per_ctx(engine), 336 "RING_BB_PER_CTX_PTR" 337 }, 338 { 339 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)), 340 lrc_ring_indirect_ptr(engine), 341 "RING_INDIRECT_CTX_PTR" 342 }, 343 { 344 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)), 345 lrc_ring_indirect_offset(engine), 346 "RING_INDIRECT_CTX_OFFSET" 347 }, 348 { 349 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), 350 CTX_TIMESTAMP - 1, 351 "RING_CTX_TIMESTAMP" 352 }, 353 { 354 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)), 355 lrc_ring_gpr0(engine), 356 "RING_CS_GPR0" 357 }, 358 { 359 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)), 360 lrc_ring_cmd_buf_cctl(engine), 361 "RING_CMD_BUF_CCTL" 362 }, 363 { 364 i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)), 365 lrc_ring_bb_offset(engine), 366 "RING_BB_OFFSET" 367 }, 368 { }, 369 }, *t; 370 u32 *hw; 371 372 if (!engine->default_state) 373 continue; 374 375 hw = shmem_pin_map(engine->default_state); 376 if (!hw) { 377 err = -ENOMEM; 378 break; 379 } 380 hw += LRC_STATE_OFFSET / sizeof(*hw); 381 382 for (t = tbl; t->name; t++) { 383 int dw = find_offset(hw, t->reg); 384 385 if (dw != t->offset) { 386 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", 387 engine->name, 388 t->name, 389 t->reg, 390 dw, 391 t->offset); 392 err = -EINVAL; 393 } 394 } 395 396 shmem_unpin_map(engine->default_state, hw); 397 } 398 399 return err; 400 } 401 402 static int __live_lrc_state(struct intel_engine_cs *engine, 403 struct i915_vma *scratch) 404 { 405 struct intel_context *ce; 406 struct i915_request *rq; 407 struct i915_gem_ww_ctx ww; 408 enum { 409 RING_START_IDX = 0, 410 RING_TAIL_IDX, 411 MAX_IDX 412 }; 413 u32 expected[MAX_IDX]; 414 u32 *cs; 415 int err; 416 int n; 417 418 ce = intel_context_create(engine); 419 if (IS_ERR(ce)) 420 return PTR_ERR(ce); 421 422 i915_gem_ww_ctx_init(&ww, false); 423 retry: 424 err = i915_gem_object_lock(scratch->obj, &ww); 425 if (!err) 426 err = intel_context_pin_ww(ce, &ww); 427 if (err) 428 goto err_put; 429 430 rq = i915_request_create(ce); 431 if (IS_ERR(rq)) { 432 err = PTR_ERR(rq); 433 goto err_unpin; 434 } 435 436 cs = intel_ring_begin(rq, 4 * MAX_IDX); 437 if (IS_ERR(cs)) { 438 err = PTR_ERR(cs); 439 i915_request_add(rq); 440 goto err_unpin; 441 } 442 443 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 444 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); 445 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); 446 *cs++ = 0; 447 448 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); 449 450 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 451 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); 452 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); 453 *cs++ = 0; 454 455 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 456 457 i915_request_get(rq); 458 i915_request_add(rq); 459 if (err) 460 goto err_rq; 461 462 intel_engine_flush_submission(engine); 463 expected[RING_TAIL_IDX] = ce->ring->tail; 464 465 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 466 err = -ETIME; 467 goto err_rq; 468 } 469 470 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 471 if (IS_ERR(cs)) { 472 err = PTR_ERR(cs); 473 goto err_rq; 474 } 475 476 for (n = 0; n < MAX_IDX; n++) { 477 if (cs[n] != expected[n]) { 478 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", 479 engine->name, n, cs[n], expected[n]); 480 err = -EINVAL; 481 break; 482 } 483 } 484 485 i915_gem_object_unpin_map(scratch->obj); 486 487 err_rq: 488 i915_request_put(rq); 489 err_unpin: 490 intel_context_unpin(ce); 491 err_put: 492 if (err == -EDEADLK) { 493 err = i915_gem_ww_ctx_backoff(&ww); 494 if (!err) 495 goto retry; 496 } 497 i915_gem_ww_ctx_fini(&ww); 498 intel_context_put(ce); 499 return err; 500 } 501 502 static int live_lrc_state(void *arg) 503 { 504 struct intel_gt *gt = arg; 505 struct intel_engine_cs *engine; 506 struct i915_vma *scratch; 507 enum intel_engine_id id; 508 int err = 0; 509 510 /* 511 * Check the live register state matches what we expect for this 512 * intel_context. 513 */ 514 515 scratch = create_scratch(gt); 516 if (IS_ERR(scratch)) 517 return PTR_ERR(scratch); 518 519 for_each_engine(engine, gt, id) { 520 err = __live_lrc_state(engine, scratch); 521 if (err) 522 break; 523 } 524 525 if (igt_flush_test(gt->i915)) 526 err = -EIO; 527 528 i915_vma_unpin_and_release(&scratch, 0); 529 return err; 530 } 531 532 static int gpr_make_dirty(struct intel_context *ce) 533 { 534 struct i915_request *rq; 535 u32 *cs; 536 int n; 537 538 rq = intel_context_create_request(ce); 539 if (IS_ERR(rq)) 540 return PTR_ERR(rq); 541 542 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); 543 if (IS_ERR(cs)) { 544 i915_request_add(rq); 545 return PTR_ERR(cs); 546 } 547 548 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); 549 for (n = 0; n < NUM_GPR_DW; n++) { 550 *cs++ = CS_GPR(ce->engine, n); 551 *cs++ = STACK_MAGIC; 552 } 553 *cs++ = MI_NOOP; 554 555 intel_ring_advance(rq, cs); 556 557 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 558 i915_request_add(rq); 559 560 return 0; 561 } 562 563 static struct i915_request * 564 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) 565 { 566 const u32 offset = 567 i915_ggtt_offset(ce->engine->status_page.vma) + 568 offset_in_page(slot); 569 struct i915_request *rq; 570 u32 *cs; 571 int err; 572 int n; 573 574 rq = intel_context_create_request(ce); 575 if (IS_ERR(rq)) 576 return rq; 577 578 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); 579 if (IS_ERR(cs)) { 580 i915_request_add(rq); 581 return ERR_CAST(cs); 582 } 583 584 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 585 *cs++ = MI_NOOP; 586 587 *cs++ = MI_SEMAPHORE_WAIT | 588 MI_SEMAPHORE_GLOBAL_GTT | 589 MI_SEMAPHORE_POLL | 590 MI_SEMAPHORE_SAD_NEQ_SDD; 591 *cs++ = 0; 592 *cs++ = offset; 593 *cs++ = 0; 594 595 for (n = 0; n < NUM_GPR_DW; n++) { 596 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 597 *cs++ = CS_GPR(ce->engine, n); 598 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 599 *cs++ = 0; 600 } 601 602 err = igt_vma_move_to_active_unlocked(scratch, rq, EXEC_OBJECT_WRITE); 603 604 i915_request_get(rq); 605 i915_request_add(rq); 606 if (err) { 607 i915_request_put(rq); 608 rq = ERR_PTR(err); 609 } 610 611 return rq; 612 } 613 614 static int __live_lrc_gpr(struct intel_engine_cs *engine, 615 struct i915_vma *scratch, 616 bool preempt) 617 { 618 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); 619 struct intel_context *ce; 620 struct i915_request *rq; 621 u32 *cs; 622 int err; 623 int n; 624 625 if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS) 626 return 0; /* GPR only on rcs0 for gen8 */ 627 628 err = gpr_make_dirty(engine->kernel_context); 629 if (err) 630 return err; 631 632 ce = intel_context_create(engine); 633 if (IS_ERR(ce)) 634 return PTR_ERR(ce); 635 636 rq = __gpr_read(ce, scratch, slot); 637 if (IS_ERR(rq)) { 638 err = PTR_ERR(rq); 639 goto err_put; 640 } 641 642 err = wait_for_submit(engine, rq, HZ / 2); 643 if (err) 644 goto err_rq; 645 646 if (preempt) { 647 err = gpr_make_dirty(engine->kernel_context); 648 if (err) 649 goto err_rq; 650 651 err = emit_semaphore_signal(engine->kernel_context, slot); 652 if (err) 653 goto err_rq; 654 655 err = wait_for_submit(engine, rq, HZ / 2); 656 if (err) 657 goto err_rq; 658 } else { 659 slot[0] = 1; 660 wmb(); 661 } 662 663 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 664 err = -ETIME; 665 goto err_rq; 666 } 667 668 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB); 669 if (IS_ERR(cs)) { 670 err = PTR_ERR(cs); 671 goto err_rq; 672 } 673 674 for (n = 0; n < NUM_GPR_DW; n++) { 675 if (cs[n]) { 676 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", 677 engine->name, 678 n / 2, n & 1 ? "udw" : "ldw", 679 cs[n]); 680 err = -EINVAL; 681 break; 682 } 683 } 684 685 i915_gem_object_unpin_map(scratch->obj); 686 687 err_rq: 688 memset32(&slot[0], -1, 4); 689 wmb(); 690 i915_request_put(rq); 691 err_put: 692 intel_context_put(ce); 693 return err; 694 } 695 696 static int live_lrc_gpr(void *arg) 697 { 698 struct intel_gt *gt = arg; 699 struct intel_engine_cs *engine; 700 struct i915_vma *scratch; 701 enum intel_engine_id id; 702 int err = 0; 703 704 /* 705 * Check that GPR registers are cleared in new contexts as we need 706 * to avoid leaking any information from previous contexts. 707 */ 708 709 scratch = create_scratch(gt); 710 if (IS_ERR(scratch)) 711 return PTR_ERR(scratch); 712 713 for_each_engine(engine, gt, id) { 714 st_engine_heartbeat_disable(engine); 715 716 err = __live_lrc_gpr(engine, scratch, false); 717 if (err) 718 goto err; 719 720 err = __live_lrc_gpr(engine, scratch, true); 721 if (err) 722 goto err; 723 724 err: 725 st_engine_heartbeat_enable(engine); 726 if (igt_flush_test(gt->i915)) 727 err = -EIO; 728 if (err) 729 break; 730 } 731 732 i915_vma_unpin_and_release(&scratch, 0); 733 return err; 734 } 735 736 static struct i915_request * 737 create_timestamp(struct intel_context *ce, void *slot, int idx) 738 { 739 const u32 offset = 740 i915_ggtt_offset(ce->engine->status_page.vma) + 741 offset_in_page(slot); 742 struct i915_request *rq; 743 u32 *cs; 744 int err; 745 746 rq = intel_context_create_request(ce); 747 if (IS_ERR(rq)) 748 return rq; 749 750 cs = intel_ring_begin(rq, 10); 751 if (IS_ERR(cs)) { 752 err = PTR_ERR(cs); 753 goto err; 754 } 755 756 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 757 *cs++ = MI_NOOP; 758 759 *cs++ = MI_SEMAPHORE_WAIT | 760 MI_SEMAPHORE_GLOBAL_GTT | 761 MI_SEMAPHORE_POLL | 762 MI_SEMAPHORE_SAD_NEQ_SDD; 763 *cs++ = 0; 764 *cs++ = offset; 765 *cs++ = 0; 766 767 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 768 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); 769 *cs++ = offset + idx * sizeof(u32); 770 *cs++ = 0; 771 772 intel_ring_advance(rq, cs); 773 774 err = 0; 775 err: 776 i915_request_get(rq); 777 i915_request_add(rq); 778 if (err) { 779 i915_request_put(rq); 780 return ERR_PTR(err); 781 } 782 783 return rq; 784 } 785 786 struct lrc_timestamp { 787 struct intel_engine_cs *engine; 788 struct intel_context *ce[2]; 789 u32 poison; 790 }; 791 792 static bool timestamp_advanced(u32 start, u32 end) 793 { 794 return (s32)(end - start) > 0; 795 } 796 797 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) 798 { 799 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); 800 struct i915_request *rq; 801 u32 timestamp; 802 int err = 0; 803 804 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; 805 rq = create_timestamp(arg->ce[0], slot, 1); 806 if (IS_ERR(rq)) 807 return PTR_ERR(rq); 808 809 err = wait_for_submit(rq->engine, rq, HZ / 2); 810 if (err) 811 goto err; 812 813 if (preempt) { 814 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; 815 err = emit_semaphore_signal(arg->ce[1], slot); 816 if (err) 817 goto err; 818 } else { 819 slot[0] = 1; 820 wmb(); 821 } 822 823 /* And wait for switch to kernel (to save our context to memory) */ 824 err = context_flush(arg->ce[0], HZ / 2); 825 if (err) 826 goto err; 827 828 if (!timestamp_advanced(arg->poison, slot[1])) { 829 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", 830 arg->engine->name, preempt ? "preempt" : "simple", 831 arg->poison, slot[1]); 832 err = -EINVAL; 833 } 834 835 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); 836 if (!timestamp_advanced(slot[1], timestamp)) { 837 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", 838 arg->engine->name, preempt ? "preempt" : "simple", 839 slot[1], timestamp); 840 err = -EINVAL; 841 } 842 843 err: 844 memset32(slot, -1, 4); 845 i915_request_put(rq); 846 return err; 847 } 848 849 static int live_lrc_timestamp(void *arg) 850 { 851 struct lrc_timestamp data = {}; 852 struct intel_gt *gt = arg; 853 enum intel_engine_id id; 854 const u32 poison[] = { 855 0, 856 S32_MAX, 857 (u32)S32_MAX + 1, 858 U32_MAX, 859 }; 860 861 /* 862 * We want to verify that the timestamp is saved and restore across 863 * context switches and is monotonic. 864 * 865 * So we do this with a little bit of LRC poisoning to check various 866 * boundary conditions, and see what happens if we preempt the context 867 * with a second request (carrying more poison into the timestamp). 868 */ 869 870 for_each_engine(data.engine, gt, id) { 871 int i, err = 0; 872 873 st_engine_heartbeat_disable(data.engine); 874 875 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 876 struct intel_context *tmp; 877 878 tmp = intel_context_create(data.engine); 879 if (IS_ERR(tmp)) { 880 err = PTR_ERR(tmp); 881 goto err; 882 } 883 884 err = intel_context_pin(tmp); 885 if (err) { 886 intel_context_put(tmp); 887 goto err; 888 } 889 890 data.ce[i] = tmp; 891 } 892 893 for (i = 0; i < ARRAY_SIZE(poison); i++) { 894 data.poison = poison[i]; 895 896 err = __lrc_timestamp(&data, false); 897 if (err) 898 break; 899 900 err = __lrc_timestamp(&data, true); 901 if (err) 902 break; 903 } 904 905 err: 906 st_engine_heartbeat_enable(data.engine); 907 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 908 if (!data.ce[i]) 909 break; 910 911 intel_context_unpin(data.ce[i]); 912 intel_context_put(data.ce[i]); 913 } 914 915 if (igt_flush_test(gt->i915)) 916 err = -EIO; 917 if (err) 918 return err; 919 } 920 921 return 0; 922 } 923 924 static struct i915_vma * 925 create_user_vma(struct i915_address_space *vm, unsigned long size) 926 { 927 struct drm_i915_gem_object *obj; 928 struct i915_vma *vma; 929 int err; 930 931 obj = i915_gem_object_create_internal(vm->i915, size); 932 if (IS_ERR(obj)) 933 return ERR_CAST(obj); 934 935 vma = i915_vma_instance(obj, vm, NULL); 936 if (IS_ERR(vma)) { 937 i915_gem_object_put(obj); 938 return vma; 939 } 940 941 err = i915_vma_pin(vma, 0, 0, PIN_USER); 942 if (err) { 943 i915_gem_object_put(obj); 944 return ERR_PTR(err); 945 } 946 947 return vma; 948 } 949 950 static u32 safe_poison(u32 offset, u32 poison) 951 { 952 /* 953 * Do not enable predication as it will nop all subsequent commands, 954 * not only disabling the tests (by preventing all the other SRM) but 955 * also preventing the arbitration events at the end of the request. 956 */ 957 if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0))) 958 poison &= ~REG_BIT(0); 959 960 return poison; 961 } 962 963 static struct i915_vma * 964 store_context(struct intel_context *ce, struct i915_vma *scratch) 965 { 966 struct i915_vma *batch; 967 u32 dw, x, *cs, *hw; 968 u32 *defaults; 969 970 batch = create_user_vma(ce->vm, SZ_64K); 971 if (IS_ERR(batch)) 972 return batch; 973 974 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC); 975 if (IS_ERR(cs)) { 976 i915_vma_put(batch); 977 return ERR_CAST(cs); 978 } 979 980 defaults = shmem_pin_map(ce->engine->default_state); 981 if (!defaults) { 982 i915_gem_object_unpin_map(batch->obj); 983 i915_vma_put(batch); 984 return ERR_PTR(-ENOMEM); 985 } 986 987 x = 0; 988 dw = 0; 989 hw = defaults; 990 hw += LRC_STATE_OFFSET / sizeof(*hw); 991 do { 992 u32 len = hw[dw] & LRI_LENGTH_MASK; 993 994 /* 995 * Keep it simple, skip parsing complex commands 996 * 997 * At present, there are no more MI_LOAD_REGISTER_IMM 998 * commands after the first 3D state command. Rather 999 * than include a table (see i915_cmd_parser.c) of all 1000 * the possible commands and their instruction lengths 1001 * (or mask for variable length instructions), assume 1002 * we have gathered the complete list of registers and 1003 * bail out. 1004 */ 1005 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) 1006 break; 1007 1008 if (hw[dw] == 0) { 1009 dw++; 1010 continue; 1011 } 1012 1013 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { 1014 /* Assume all other MI commands match LRI length mask */ 1015 dw += len + 2; 1016 continue; 1017 } 1018 1019 if (!len) { 1020 pr_err("%s: invalid LRI found in context image\n", 1021 ce->engine->name); 1022 igt_hexdump(defaults, PAGE_SIZE); 1023 break; 1024 } 1025 1026 dw++; 1027 len = (len + 1) / 2; 1028 while (len--) { 1029 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 1030 *cs++ = hw[dw]; 1031 *cs++ = lower_32_bits(i915_vma_offset(scratch) + x); 1032 *cs++ = upper_32_bits(i915_vma_offset(scratch) + x); 1033 1034 dw += 2; 1035 x += 4; 1036 } 1037 } while (dw < PAGE_SIZE / sizeof(u32) && 1038 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 1039 1040 *cs++ = MI_BATCH_BUFFER_END; 1041 1042 shmem_unpin_map(ce->engine->default_state, defaults); 1043 1044 i915_gem_object_flush_map(batch->obj); 1045 i915_gem_object_unpin_map(batch->obj); 1046 1047 return batch; 1048 } 1049 1050 static struct i915_request * 1051 record_registers(struct intel_context *ce, 1052 struct i915_vma *before, 1053 struct i915_vma *after, 1054 u32 *sema) 1055 { 1056 struct i915_vma *b_before, *b_after; 1057 struct i915_request *rq; 1058 u32 *cs; 1059 int err; 1060 1061 b_before = store_context(ce, before); 1062 if (IS_ERR(b_before)) 1063 return ERR_CAST(b_before); 1064 1065 b_after = store_context(ce, after); 1066 if (IS_ERR(b_after)) { 1067 rq = ERR_CAST(b_after); 1068 goto err_before; 1069 } 1070 1071 rq = intel_context_create_request(ce); 1072 if (IS_ERR(rq)) 1073 goto err_after; 1074 1075 err = igt_vma_move_to_active_unlocked(before, rq, EXEC_OBJECT_WRITE); 1076 if (err) 1077 goto err_rq; 1078 1079 err = igt_vma_move_to_active_unlocked(b_before, rq, 0); 1080 if (err) 1081 goto err_rq; 1082 1083 err = igt_vma_move_to_active_unlocked(after, rq, EXEC_OBJECT_WRITE); 1084 if (err) 1085 goto err_rq; 1086 1087 err = igt_vma_move_to_active_unlocked(b_after, rq, 0); 1088 if (err) 1089 goto err_rq; 1090 1091 cs = intel_ring_begin(rq, 14); 1092 if (IS_ERR(cs)) { 1093 err = PTR_ERR(cs); 1094 goto err_rq; 1095 } 1096 1097 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1098 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 1099 *cs++ = lower_32_bits(i915_vma_offset(b_before)); 1100 *cs++ = upper_32_bits(i915_vma_offset(b_before)); 1101 1102 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1103 *cs++ = MI_SEMAPHORE_WAIT | 1104 MI_SEMAPHORE_GLOBAL_GTT | 1105 MI_SEMAPHORE_POLL | 1106 MI_SEMAPHORE_SAD_NEQ_SDD; 1107 *cs++ = 0; 1108 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 1109 offset_in_page(sema); 1110 *cs++ = 0; 1111 *cs++ = MI_NOOP; 1112 1113 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1114 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 1115 *cs++ = lower_32_bits(i915_vma_offset(b_after)); 1116 *cs++ = upper_32_bits(i915_vma_offset(b_after)); 1117 1118 intel_ring_advance(rq, cs); 1119 1120 WRITE_ONCE(*sema, 0); 1121 i915_request_get(rq); 1122 i915_request_add(rq); 1123 err_after: 1124 i915_vma_put(b_after); 1125 err_before: 1126 i915_vma_put(b_before); 1127 return rq; 1128 1129 err_rq: 1130 i915_request_add(rq); 1131 rq = ERR_PTR(err); 1132 goto err_after; 1133 } 1134 1135 static struct i915_vma *load_context(struct intel_context *ce, u32 poison) 1136 { 1137 struct i915_vma *batch; 1138 u32 dw, *cs, *hw; 1139 u32 *defaults; 1140 1141 batch = create_user_vma(ce->vm, SZ_64K); 1142 if (IS_ERR(batch)) 1143 return batch; 1144 1145 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC); 1146 if (IS_ERR(cs)) { 1147 i915_vma_put(batch); 1148 return ERR_CAST(cs); 1149 } 1150 1151 defaults = shmem_pin_map(ce->engine->default_state); 1152 if (!defaults) { 1153 i915_gem_object_unpin_map(batch->obj); 1154 i915_vma_put(batch); 1155 return ERR_PTR(-ENOMEM); 1156 } 1157 1158 dw = 0; 1159 hw = defaults; 1160 hw += LRC_STATE_OFFSET / sizeof(*hw); 1161 do { 1162 u32 len = hw[dw] & LRI_LENGTH_MASK; 1163 1164 /* For simplicity, break parsing at the first complex command */ 1165 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) 1166 break; 1167 1168 if (hw[dw] == 0) { 1169 dw++; 1170 continue; 1171 } 1172 1173 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { 1174 dw += len + 2; 1175 continue; 1176 } 1177 1178 if (!len) { 1179 pr_err("%s: invalid LRI found in context image\n", 1180 ce->engine->name); 1181 igt_hexdump(defaults, PAGE_SIZE); 1182 break; 1183 } 1184 1185 dw++; 1186 len = (len + 1) / 2; 1187 *cs++ = MI_LOAD_REGISTER_IMM(len); 1188 while (len--) { 1189 *cs++ = hw[dw]; 1190 *cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine, 1191 MI_LRI_LRM_CS_MMIO), 1192 poison); 1193 dw += 2; 1194 } 1195 } while (dw < PAGE_SIZE / sizeof(u32) && 1196 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 1197 1198 *cs++ = MI_BATCH_BUFFER_END; 1199 1200 shmem_unpin_map(ce->engine->default_state, defaults); 1201 1202 i915_gem_object_flush_map(batch->obj); 1203 i915_gem_object_unpin_map(batch->obj); 1204 1205 return batch; 1206 } 1207 1208 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) 1209 { 1210 struct i915_request *rq; 1211 struct i915_vma *batch; 1212 u32 *cs; 1213 int err; 1214 1215 batch = load_context(ce, poison); 1216 if (IS_ERR(batch)) 1217 return PTR_ERR(batch); 1218 1219 rq = intel_context_create_request(ce); 1220 if (IS_ERR(rq)) { 1221 err = PTR_ERR(rq); 1222 goto err_batch; 1223 } 1224 1225 err = igt_vma_move_to_active_unlocked(batch, rq, 0); 1226 if (err) 1227 goto err_rq; 1228 1229 cs = intel_ring_begin(rq, 8); 1230 if (IS_ERR(cs)) { 1231 err = PTR_ERR(cs); 1232 goto err_rq; 1233 } 1234 1235 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1236 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 1237 *cs++ = lower_32_bits(i915_vma_offset(batch)); 1238 *cs++ = upper_32_bits(i915_vma_offset(batch)); 1239 1240 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1241 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 1242 offset_in_page(sema); 1243 *cs++ = 0; 1244 *cs++ = 1; 1245 1246 intel_ring_advance(rq, cs); 1247 1248 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 1249 err_rq: 1250 i915_request_add(rq); 1251 err_batch: 1252 i915_vma_put(batch); 1253 return err; 1254 } 1255 1256 static bool is_moving(u32 a, u32 b) 1257 { 1258 return a != b; 1259 } 1260 1261 static int compare_isolation(struct intel_engine_cs *engine, 1262 struct i915_vma *ref[2], 1263 struct i915_vma *result[2], 1264 struct intel_context *ce, 1265 u32 poison) 1266 { 1267 u32 x, dw, *hw, *lrc; 1268 u32 *A[2], *B[2]; 1269 u32 *defaults; 1270 int err = 0; 1271 1272 A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC); 1273 if (IS_ERR(A[0])) 1274 return PTR_ERR(A[0]); 1275 1276 A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC); 1277 if (IS_ERR(A[1])) { 1278 err = PTR_ERR(A[1]); 1279 goto err_A0; 1280 } 1281 1282 B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC); 1283 if (IS_ERR(B[0])) { 1284 err = PTR_ERR(B[0]); 1285 goto err_A1; 1286 } 1287 1288 B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC); 1289 if (IS_ERR(B[1])) { 1290 err = PTR_ERR(B[1]); 1291 goto err_B0; 1292 } 1293 1294 lrc = i915_gem_object_pin_map_unlocked(ce->state->obj, 1295 intel_gt_coherent_map_type(engine->gt, 1296 ce->state->obj, 1297 false)); 1298 if (IS_ERR(lrc)) { 1299 err = PTR_ERR(lrc); 1300 goto err_B1; 1301 } 1302 lrc += LRC_STATE_OFFSET / sizeof(*hw); 1303 1304 defaults = shmem_pin_map(ce->engine->default_state); 1305 if (!defaults) { 1306 err = -ENOMEM; 1307 goto err_lrc; 1308 } 1309 1310 x = 0; 1311 dw = 0; 1312 hw = defaults; 1313 hw += LRC_STATE_OFFSET / sizeof(*hw); 1314 do { 1315 u32 len = hw[dw] & LRI_LENGTH_MASK; 1316 1317 /* For simplicity, break parsing at the first complex command */ 1318 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) 1319 break; 1320 1321 if (hw[dw] == 0) { 1322 dw++; 1323 continue; 1324 } 1325 1326 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { 1327 dw += len + 2; 1328 continue; 1329 } 1330 1331 if (!len) { 1332 pr_err("%s: invalid LRI found in context image\n", 1333 engine->name); 1334 igt_hexdump(defaults, PAGE_SIZE); 1335 break; 1336 } 1337 1338 dw++; 1339 len = (len + 1) / 2; 1340 while (len--) { 1341 if (!is_moving(A[0][x], A[1][x]) && 1342 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { 1343 switch (hw[dw] & 4095) { 1344 case 0x30: /* RING_HEAD */ 1345 case 0x34: /* RING_TAIL */ 1346 break; 1347 1348 default: 1349 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", 1350 engine->name, dw, 1351 hw[dw], hw[dw + 1], 1352 A[0][x], B[0][x], B[1][x], 1353 poison, lrc[dw + 1]); 1354 err = -EINVAL; 1355 } 1356 } 1357 dw += 2; 1358 x++; 1359 } 1360 } while (dw < PAGE_SIZE / sizeof(u32) && 1361 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 1362 1363 shmem_unpin_map(ce->engine->default_state, defaults); 1364 err_lrc: 1365 i915_gem_object_unpin_map(ce->state->obj); 1366 err_B1: 1367 i915_gem_object_unpin_map(result[1]->obj); 1368 err_B0: 1369 i915_gem_object_unpin_map(result[0]->obj); 1370 err_A1: 1371 i915_gem_object_unpin_map(ref[1]->obj); 1372 err_A0: 1373 i915_gem_object_unpin_map(ref[0]->obj); 1374 return err; 1375 } 1376 1377 static struct i915_vma * 1378 create_result_vma(struct i915_address_space *vm, unsigned long sz) 1379 { 1380 struct i915_vma *vma; 1381 void *ptr; 1382 1383 vma = create_user_vma(vm, sz); 1384 if (IS_ERR(vma)) 1385 return vma; 1386 1387 /* Set the results to a known value distinct from the poison */ 1388 ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC); 1389 if (IS_ERR(ptr)) { 1390 i915_vma_put(vma); 1391 return ERR_CAST(ptr); 1392 } 1393 1394 memset(ptr, POISON_INUSE, vma->size); 1395 i915_gem_object_flush_map(vma->obj); 1396 i915_gem_object_unpin_map(vma->obj); 1397 1398 return vma; 1399 } 1400 1401 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) 1402 { 1403 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); 1404 struct i915_vma *ref[2], *result[2]; 1405 struct intel_context *A, *B; 1406 struct i915_request *rq; 1407 int err; 1408 1409 A = intel_context_create(engine); 1410 if (IS_ERR(A)) 1411 return PTR_ERR(A); 1412 1413 B = intel_context_create(engine); 1414 if (IS_ERR(B)) { 1415 err = PTR_ERR(B); 1416 goto err_A; 1417 } 1418 1419 ref[0] = create_result_vma(A->vm, SZ_64K); 1420 if (IS_ERR(ref[0])) { 1421 err = PTR_ERR(ref[0]); 1422 goto err_B; 1423 } 1424 1425 ref[1] = create_result_vma(A->vm, SZ_64K); 1426 if (IS_ERR(ref[1])) { 1427 err = PTR_ERR(ref[1]); 1428 goto err_ref0; 1429 } 1430 1431 rq = record_registers(A, ref[0], ref[1], sema); 1432 if (IS_ERR(rq)) { 1433 err = PTR_ERR(rq); 1434 goto err_ref1; 1435 } 1436 1437 WRITE_ONCE(*sema, 1); 1438 wmb(); 1439 1440 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 1441 i915_request_put(rq); 1442 err = -ETIME; 1443 goto err_ref1; 1444 } 1445 i915_request_put(rq); 1446 1447 result[0] = create_result_vma(A->vm, SZ_64K); 1448 if (IS_ERR(result[0])) { 1449 err = PTR_ERR(result[0]); 1450 goto err_ref1; 1451 } 1452 1453 result[1] = create_result_vma(A->vm, SZ_64K); 1454 if (IS_ERR(result[1])) { 1455 err = PTR_ERR(result[1]); 1456 goto err_result0; 1457 } 1458 1459 rq = record_registers(A, result[0], result[1], sema); 1460 if (IS_ERR(rq)) { 1461 err = PTR_ERR(rq); 1462 goto err_result1; 1463 } 1464 1465 err = poison_registers(B, poison, sema); 1466 if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) { 1467 pr_err("%s(%s): wait for results timed out\n", 1468 __func__, engine->name); 1469 err = -ETIME; 1470 } 1471 1472 /* Always cancel the semaphore wait, just in case the GPU gets stuck */ 1473 WRITE_ONCE(*sema, -1); 1474 i915_request_put(rq); 1475 if (err) 1476 goto err_result1; 1477 1478 err = compare_isolation(engine, ref, result, A, poison); 1479 1480 err_result1: 1481 i915_vma_put(result[1]); 1482 err_result0: 1483 i915_vma_put(result[0]); 1484 err_ref1: 1485 i915_vma_put(ref[1]); 1486 err_ref0: 1487 i915_vma_put(ref[0]); 1488 err_B: 1489 intel_context_put(B); 1490 err_A: 1491 intel_context_put(A); 1492 return err; 1493 } 1494 1495 static bool skip_isolation(const struct intel_engine_cs *engine) 1496 { 1497 if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9) 1498 return true; 1499 1500 if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11) 1501 return true; 1502 1503 return false; 1504 } 1505 1506 static int live_lrc_isolation(void *arg) 1507 { 1508 struct intel_gt *gt = arg; 1509 struct intel_engine_cs *engine; 1510 enum intel_engine_id id; 1511 const u32 poison[] = { 1512 STACK_MAGIC, 1513 0x3a3a3a3a, 1514 0x5c5c5c5c, 1515 0xffffffff, 1516 0xffff0000, 1517 }; 1518 int err = 0; 1519 1520 /* 1521 * Our goal is try and verify that per-context state cannot be 1522 * tampered with by another non-privileged client. 1523 * 1524 * We take the list of context registers from the LRI in the default 1525 * context image and attempt to modify that list from a remote context. 1526 */ 1527 1528 for_each_engine(engine, gt, id) { 1529 int i; 1530 1531 /* Just don't even ask */ 1532 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && 1533 skip_isolation(engine)) 1534 continue; 1535 1536 intel_engine_pm_get(engine); 1537 for (i = 0; i < ARRAY_SIZE(poison); i++) { 1538 int result; 1539 1540 result = __lrc_isolation(engine, poison[i]); 1541 if (result && !err) 1542 err = result; 1543 1544 result = __lrc_isolation(engine, ~poison[i]); 1545 if (result && !err) 1546 err = result; 1547 } 1548 intel_engine_pm_put(engine); 1549 if (igt_flush_test(gt->i915)) { 1550 err = -EIO; 1551 break; 1552 } 1553 } 1554 1555 return err; 1556 } 1557 1558 static int wabb_ctx_submit_req(struct intel_context *ce) 1559 { 1560 struct i915_request *rq; 1561 int err = 0; 1562 1563 rq = intel_context_create_request(ce); 1564 if (IS_ERR(rq)) 1565 return PTR_ERR(rq); 1566 1567 i915_request_get(rq); 1568 i915_request_add(rq); 1569 1570 if (i915_request_wait(rq, 0, HZ / 5) < 0) 1571 err = -ETIME; 1572 1573 i915_request_put(rq); 1574 1575 return err; 1576 } 1577 1578 #define CTX_BB_CANARY_OFFSET (3 * 1024) 1579 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) 1580 1581 static u32 * 1582 emit_wabb_ctx_canary(const struct intel_context *ce, 1583 u32 *cs, bool per_ctx) 1584 { 1585 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | 1586 MI_SRM_LRM_GLOBAL_GTT | 1587 MI_LRI_LRM_CS_MMIO; 1588 *cs++ = i915_mmio_reg_offset(RING_START(0)); 1589 *cs++ = i915_ggtt_offset(ce->state) + 1590 context_wa_bb_offset(ce) + 1591 CTX_BB_CANARY_OFFSET + 1592 (per_ctx ? PAGE_SIZE : 0); 1593 *cs++ = 0; 1594 1595 return cs; 1596 } 1597 1598 static u32 * 1599 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) 1600 { 1601 return emit_wabb_ctx_canary(ce, cs, false); 1602 } 1603 1604 static u32 * 1605 emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs) 1606 { 1607 return emit_wabb_ctx_canary(ce, cs, true); 1608 } 1609 1610 static void 1611 wabb_ctx_setup(struct intel_context *ce, bool per_ctx) 1612 { 1613 u32 *cs = context_wabb(ce, per_ctx); 1614 1615 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; 1616 1617 if (per_ctx) 1618 setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary); 1619 else 1620 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); 1621 } 1622 1623 static bool check_ring_start(struct intel_context *ce, bool per_ctx) 1624 { 1625 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - 1626 LRC_STATE_OFFSET + context_wa_bb_offset(ce) + 1627 (per_ctx ? PAGE_SIZE : 0); 1628 1629 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) 1630 return true; 1631 1632 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n", 1633 ctx_bb[CTX_BB_CANARY_INDEX], 1634 ce->lrc_reg_state[CTX_RING_START]); 1635 1636 return false; 1637 } 1638 1639 static int wabb_ctx_check(struct intel_context *ce, bool per_ctx) 1640 { 1641 int err; 1642 1643 err = wabb_ctx_submit_req(ce); 1644 if (err) 1645 return err; 1646 1647 if (!check_ring_start(ce, per_ctx)) 1648 return -EINVAL; 1649 1650 return 0; 1651 } 1652 1653 static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx) 1654 { 1655 struct intel_context *a, *b; 1656 int err; 1657 1658 a = intel_context_create(engine); 1659 if (IS_ERR(a)) 1660 return PTR_ERR(a); 1661 err = intel_context_pin(a); 1662 if (err) 1663 goto put_a; 1664 1665 b = intel_context_create(engine); 1666 if (IS_ERR(b)) { 1667 err = PTR_ERR(b); 1668 goto unpin_a; 1669 } 1670 err = intel_context_pin(b); 1671 if (err) 1672 goto put_b; 1673 1674 /* We use the already reserved extra page in context state */ 1675 if (!a->wa_bb_page) { 1676 GEM_BUG_ON(b->wa_bb_page); 1677 GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12); 1678 goto unpin_b; 1679 } 1680 1681 /* 1682 * In order to test that our per context bb is truly per context, 1683 * and executes at the intended spot on context restoring process, 1684 * make the batch store the ring start value to memory. 1685 * As ring start is restored apriori of starting the indirect ctx bb and 1686 * as it will be different for each context, it fits to this purpose. 1687 */ 1688 wabb_ctx_setup(a, per_ctx); 1689 wabb_ctx_setup(b, per_ctx); 1690 1691 err = wabb_ctx_check(a, per_ctx); 1692 if (err) 1693 goto unpin_b; 1694 1695 err = wabb_ctx_check(b, per_ctx); 1696 1697 unpin_b: 1698 intel_context_unpin(b); 1699 put_b: 1700 intel_context_put(b); 1701 unpin_a: 1702 intel_context_unpin(a); 1703 put_a: 1704 intel_context_put(a); 1705 1706 return err; 1707 } 1708 1709 static int lrc_wabb_ctx(void *arg, bool per_ctx) 1710 { 1711 struct intel_gt *gt = arg; 1712 struct intel_engine_cs *engine; 1713 enum intel_engine_id id; 1714 int err = 0; 1715 1716 for_each_engine(engine, gt, id) { 1717 intel_engine_pm_get(engine); 1718 err = __lrc_wabb_ctx(engine, per_ctx); 1719 intel_engine_pm_put(engine); 1720 1721 if (igt_flush_test(gt->i915)) 1722 err = -EIO; 1723 1724 if (err) 1725 break; 1726 } 1727 1728 return err; 1729 } 1730 1731 static int live_lrc_indirect_ctx_bb(void *arg) 1732 { 1733 return lrc_wabb_ctx(arg, false); 1734 } 1735 1736 static int live_lrc_per_ctx_bb(void *arg) 1737 { 1738 return lrc_wabb_ctx(arg, true); 1739 } 1740 1741 static void garbage_reset(struct intel_engine_cs *engine, 1742 struct i915_request *rq) 1743 { 1744 const unsigned int bit = I915_RESET_ENGINE + engine->id; 1745 unsigned long *lock = &engine->gt->reset.flags; 1746 1747 local_bh_disable(); 1748 if (!test_and_set_bit(bit, lock)) { 1749 tasklet_disable(&engine->sched_engine->tasklet); 1750 1751 if (!rq->fence.error) 1752 __intel_engine_reset_bh(engine, NULL); 1753 1754 tasklet_enable(&engine->sched_engine->tasklet); 1755 clear_and_wake_up_bit(bit, lock); 1756 } 1757 local_bh_enable(); 1758 } 1759 1760 static struct i915_request *garbage(struct intel_context *ce, 1761 struct rnd_state *prng) 1762 { 1763 struct i915_request *rq; 1764 int err; 1765 1766 err = intel_context_pin(ce); 1767 if (err) 1768 return ERR_PTR(err); 1769 1770 prandom_bytes_state(prng, 1771 ce->lrc_reg_state, 1772 ce->engine->context_size - 1773 LRC_STATE_OFFSET); 1774 1775 rq = intel_context_create_request(ce); 1776 if (IS_ERR(rq)) { 1777 err = PTR_ERR(rq); 1778 goto err_unpin; 1779 } 1780 1781 i915_request_get(rq); 1782 i915_request_add(rq); 1783 return rq; 1784 1785 err_unpin: 1786 intel_context_unpin(ce); 1787 return ERR_PTR(err); 1788 } 1789 1790 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) 1791 { 1792 struct intel_context *ce; 1793 struct i915_request *hang; 1794 int err = 0; 1795 1796 ce = intel_context_create(engine); 1797 if (IS_ERR(ce)) 1798 return PTR_ERR(ce); 1799 1800 hang = garbage(ce, prng); 1801 if (IS_ERR(hang)) { 1802 err = PTR_ERR(hang); 1803 goto err_ce; 1804 } 1805 1806 if (wait_for_submit(engine, hang, HZ / 2)) { 1807 i915_request_put(hang); 1808 err = -ETIME; 1809 goto err_ce; 1810 } 1811 1812 intel_context_set_banned(ce); 1813 garbage_reset(engine, hang); 1814 1815 intel_engine_flush_submission(engine); 1816 if (!hang->fence.error) { 1817 i915_request_put(hang); 1818 pr_err("%s: corrupted context was not reset\n", 1819 engine->name); 1820 err = -EINVAL; 1821 goto err_ce; 1822 } 1823 1824 if (i915_request_wait(hang, 0, HZ / 2) < 0) { 1825 pr_err("%s: corrupted context did not recover\n", 1826 engine->name); 1827 i915_request_put(hang); 1828 err = -EIO; 1829 goto err_ce; 1830 } 1831 i915_request_put(hang); 1832 1833 err_ce: 1834 intel_context_put(ce); 1835 return err; 1836 } 1837 1838 static int live_lrc_garbage(void *arg) 1839 { 1840 struct intel_gt *gt = arg; 1841 struct intel_engine_cs *engine; 1842 enum intel_engine_id id; 1843 1844 /* 1845 * Verify that we can recover if one context state is completely 1846 * corrupted. 1847 */ 1848 1849 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) 1850 return 0; 1851 1852 for_each_engine(engine, gt, id) { 1853 I915_RND_STATE(prng); 1854 int err = 0, i; 1855 1856 if (!intel_has_reset_engine(engine->gt)) 1857 continue; 1858 1859 intel_engine_pm_get(engine); 1860 for (i = 0; i < 3; i++) { 1861 err = __lrc_garbage(engine, &prng); 1862 if (err) 1863 break; 1864 } 1865 intel_engine_pm_put(engine); 1866 1867 if (igt_flush_test(gt->i915)) 1868 err = -EIO; 1869 if (err) 1870 return err; 1871 } 1872 1873 return 0; 1874 } 1875 1876 static int __live_pphwsp_runtime(struct intel_engine_cs *engine) 1877 { 1878 struct intel_context *ce; 1879 struct i915_request *rq; 1880 IGT_TIMEOUT(end_time); 1881 int err; 1882 1883 ce = intel_context_create(engine); 1884 if (IS_ERR(ce)) 1885 return PTR_ERR(ce); 1886 1887 ce->stats.runtime.num_underflow = 0; 1888 ce->stats.runtime.max_underflow = 0; 1889 1890 do { 1891 unsigned int loop = 1024; 1892 1893 while (loop) { 1894 rq = intel_context_create_request(ce); 1895 if (IS_ERR(rq)) { 1896 err = PTR_ERR(rq); 1897 goto err_rq; 1898 } 1899 1900 if (--loop == 0) 1901 i915_request_get(rq); 1902 1903 i915_request_add(rq); 1904 } 1905 1906 if (__igt_timeout(end_time, NULL)) 1907 break; 1908 1909 i915_request_put(rq); 1910 } while (1); 1911 1912 err = i915_request_wait(rq, 0, HZ / 5); 1913 if (err < 0) { 1914 pr_err("%s: request not completed!\n", engine->name); 1915 goto err_wait; 1916 } 1917 1918 igt_flush_test(engine->i915); 1919 1920 pr_info("%s: pphwsp runtime %lluns, average %lluns\n", 1921 engine->name, 1922 intel_context_get_total_runtime_ns(ce), 1923 intel_context_get_avg_runtime_ns(ce)); 1924 1925 err = 0; 1926 if (ce->stats.runtime.num_underflow) { 1927 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", 1928 engine->name, 1929 ce->stats.runtime.num_underflow, 1930 ce->stats.runtime.max_underflow); 1931 GEM_TRACE_DUMP(); 1932 err = -EOVERFLOW; 1933 } 1934 1935 err_wait: 1936 i915_request_put(rq); 1937 err_rq: 1938 intel_context_put(ce); 1939 return err; 1940 } 1941 1942 static int live_pphwsp_runtime(void *arg) 1943 { 1944 struct intel_gt *gt = arg; 1945 struct intel_engine_cs *engine; 1946 enum intel_engine_id id; 1947 int err = 0; 1948 1949 /* 1950 * Check that cumulative context runtime as stored in the pphwsp[16] 1951 * is monotonic. 1952 */ 1953 1954 for_each_engine(engine, gt, id) { 1955 err = __live_pphwsp_runtime(engine); 1956 if (err) 1957 break; 1958 } 1959 1960 if (igt_flush_test(gt->i915)) 1961 err = -EIO; 1962 1963 return err; 1964 } 1965 1966 int intel_lrc_live_selftests(struct drm_i915_private *i915) 1967 { 1968 static const struct i915_subtest tests[] = { 1969 SUBTEST(live_lrc_layout), 1970 SUBTEST(live_lrc_fixed), 1971 SUBTEST(live_lrc_state), 1972 SUBTEST(live_lrc_gpr), 1973 SUBTEST(live_lrc_isolation), 1974 SUBTEST(live_lrc_timestamp), 1975 SUBTEST(live_lrc_garbage), 1976 SUBTEST(live_pphwsp_runtime), 1977 SUBTEST(live_lrc_indirect_ctx_bb), 1978 SUBTEST(live_lrc_per_ctx_bb), 1979 }; 1980 1981 if (!HAS_LOGICAL_RING_CONTEXTS(i915)) 1982 return 0; 1983 1984 return intel_gt_live_subtests(tests, to_gt(i915)); 1985 } 1986