1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2018 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_engine_heartbeat.h" 11 #include "gt/intel_reset.h" 12 #include "gt/selftest_engine_heartbeat.h" 13 14 #include "i915_selftest.h" 15 #include "selftests/i915_random.h" 16 #include "selftests/igt_flush_test.h" 17 #include "selftests/igt_live_test.h" 18 #include "selftests/igt_spinner.h" 19 #include "selftests/lib_sw_fence.h" 20 21 #include "gem/selftests/igt_gem_utils.h" 22 #include "gem/selftests/mock_context.h" 23 24 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 25 #define NUM_GPR 16 26 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ 27 28 static struct i915_vma *create_scratch(struct intel_gt *gt) 29 { 30 struct drm_i915_gem_object *obj; 31 struct i915_vma *vma; 32 int err; 33 34 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 35 if (IS_ERR(obj)) 36 return ERR_CAST(obj); 37 38 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); 39 40 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 41 if (IS_ERR(vma)) { 42 i915_gem_object_put(obj); 43 return vma; 44 } 45 46 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 47 if (err) { 48 i915_gem_object_put(obj); 49 return ERR_PTR(err); 50 } 51 52 return vma; 53 } 54 55 static bool is_active(struct i915_request *rq) 56 { 57 if (i915_request_is_active(rq)) 58 return true; 59 60 if (i915_request_on_hold(rq)) 61 return true; 62 63 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq)) 64 return true; 65 66 return false; 67 } 68 69 static int wait_for_submit(struct intel_engine_cs *engine, 70 struct i915_request *rq, 71 unsigned long timeout) 72 { 73 timeout += jiffies; 74 do { 75 bool done = time_after(jiffies, timeout); 76 77 if (i915_request_completed(rq)) /* that was quick! */ 78 return 0; 79 80 /* Wait until the HW has acknowleged the submission (or err) */ 81 intel_engine_flush_submission(engine); 82 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) 83 return 0; 84 85 if (done) 86 return -ETIME; 87 88 cond_resched(); 89 } while (1); 90 } 91 92 static int wait_for_reset(struct intel_engine_cs *engine, 93 struct i915_request *rq, 94 unsigned long timeout) 95 { 96 timeout += jiffies; 97 98 do { 99 cond_resched(); 100 intel_engine_flush_submission(engine); 101 102 if (READ_ONCE(engine->execlists.pending[0])) 103 continue; 104 105 if (i915_request_completed(rq)) 106 break; 107 108 if (READ_ONCE(rq->fence.error)) 109 break; 110 } while (time_before(jiffies, timeout)); 111 112 flush_scheduled_work(); 113 114 if (rq->fence.error != -EIO) { 115 pr_err("%s: hanging request %llx:%lld not reset\n", 116 engine->name, 117 rq->fence.context, 118 rq->fence.seqno); 119 return -EINVAL; 120 } 121 122 /* Give the request a jiffie to complete after flushing the worker */ 123 if (i915_request_wait(rq, 0, 124 max(0l, (long)(timeout - jiffies)) + 1) < 0) { 125 pr_err("%s: hanging request %llx:%lld did not complete\n", 126 engine->name, 127 rq->fence.context, 128 rq->fence.seqno); 129 return -ETIME; 130 } 131 132 return 0; 133 } 134 135 static int live_sanitycheck(void *arg) 136 { 137 struct intel_gt *gt = arg; 138 struct intel_engine_cs *engine; 139 enum intel_engine_id id; 140 struct igt_spinner spin; 141 int err = 0; 142 143 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) 144 return 0; 145 146 if (igt_spinner_init(&spin, gt)) 147 return -ENOMEM; 148 149 for_each_engine(engine, gt, id) { 150 struct intel_context *ce; 151 struct i915_request *rq; 152 153 ce = intel_context_create(engine); 154 if (IS_ERR(ce)) { 155 err = PTR_ERR(ce); 156 break; 157 } 158 159 rq = igt_spinner_create_request(&spin, ce, MI_NOOP); 160 if (IS_ERR(rq)) { 161 err = PTR_ERR(rq); 162 goto out_ctx; 163 } 164 165 i915_request_add(rq); 166 if (!igt_wait_for_spinner(&spin, rq)) { 167 GEM_TRACE("spinner failed to start\n"); 168 GEM_TRACE_DUMP(); 169 intel_gt_set_wedged(gt); 170 err = -EIO; 171 goto out_ctx; 172 } 173 174 igt_spinner_end(&spin); 175 if (igt_flush_test(gt->i915)) { 176 err = -EIO; 177 goto out_ctx; 178 } 179 180 out_ctx: 181 intel_context_put(ce); 182 if (err) 183 break; 184 } 185 186 igt_spinner_fini(&spin); 187 return err; 188 } 189 190 static int live_unlite_restore(struct intel_gt *gt, int prio) 191 { 192 struct intel_engine_cs *engine; 193 enum intel_engine_id id; 194 struct igt_spinner spin; 195 int err = -ENOMEM; 196 197 /* 198 * Check that we can correctly context switch between 2 instances 199 * on the same engine from the same parent context. 200 */ 201 202 if (igt_spinner_init(&spin, gt)) 203 return err; 204 205 err = 0; 206 for_each_engine(engine, gt, id) { 207 struct intel_context *ce[2] = {}; 208 struct i915_request *rq[2]; 209 struct igt_live_test t; 210 int n; 211 212 if (prio && !intel_engine_has_preemption(engine)) 213 continue; 214 215 if (!intel_engine_can_store_dword(engine)) 216 continue; 217 218 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 219 err = -EIO; 220 break; 221 } 222 st_engine_heartbeat_disable(engine); 223 224 for (n = 0; n < ARRAY_SIZE(ce); n++) { 225 struct intel_context *tmp; 226 227 tmp = intel_context_create(engine); 228 if (IS_ERR(tmp)) { 229 err = PTR_ERR(tmp); 230 goto err_ce; 231 } 232 233 err = intel_context_pin(tmp); 234 if (err) { 235 intel_context_put(tmp); 236 goto err_ce; 237 } 238 239 /* 240 * Setup the pair of contexts such that if we 241 * lite-restore using the RING_TAIL from ce[1] it 242 * will execute garbage from ce[0]->ring. 243 */ 244 memset(tmp->ring->vaddr, 245 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ 246 tmp->ring->vma->size); 247 248 ce[n] = tmp; 249 } 250 GEM_BUG_ON(!ce[1]->ring->size); 251 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); 252 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head); 253 254 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 255 if (IS_ERR(rq[0])) { 256 err = PTR_ERR(rq[0]); 257 goto err_ce; 258 } 259 260 i915_request_get(rq[0]); 261 i915_request_add(rq[0]); 262 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); 263 264 if (!igt_wait_for_spinner(&spin, rq[0])) { 265 i915_request_put(rq[0]); 266 goto err_ce; 267 } 268 269 rq[1] = i915_request_create(ce[1]); 270 if (IS_ERR(rq[1])) { 271 err = PTR_ERR(rq[1]); 272 i915_request_put(rq[0]); 273 goto err_ce; 274 } 275 276 if (!prio) { 277 /* 278 * Ensure we do the switch to ce[1] on completion. 279 * 280 * rq[0] is already submitted, so this should reduce 281 * to a no-op (a wait on a request on the same engine 282 * uses the submit fence, not the completion fence), 283 * but it will install a dependency on rq[1] for rq[0] 284 * that will prevent the pair being reordered by 285 * timeslicing. 286 */ 287 i915_request_await_dma_fence(rq[1], &rq[0]->fence); 288 } 289 290 i915_request_get(rq[1]); 291 i915_request_add(rq[1]); 292 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); 293 i915_request_put(rq[0]); 294 295 if (prio) { 296 struct i915_sched_attr attr = { 297 .priority = prio, 298 }; 299 300 /* Alternatively preempt the spinner with ce[1] */ 301 engine->schedule(rq[1], &attr); 302 } 303 304 /* And switch back to ce[0] for good measure */ 305 rq[0] = i915_request_create(ce[0]); 306 if (IS_ERR(rq[0])) { 307 err = PTR_ERR(rq[0]); 308 i915_request_put(rq[1]); 309 goto err_ce; 310 } 311 312 i915_request_await_dma_fence(rq[0], &rq[1]->fence); 313 i915_request_get(rq[0]); 314 i915_request_add(rq[0]); 315 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); 316 i915_request_put(rq[1]); 317 i915_request_put(rq[0]); 318 319 err_ce: 320 intel_engine_flush_submission(engine); 321 igt_spinner_end(&spin); 322 for (n = 0; n < ARRAY_SIZE(ce); n++) { 323 if (IS_ERR_OR_NULL(ce[n])) 324 break; 325 326 intel_context_unpin(ce[n]); 327 intel_context_put(ce[n]); 328 } 329 330 st_engine_heartbeat_enable(engine); 331 if (igt_live_test_end(&t)) 332 err = -EIO; 333 if (err) 334 break; 335 } 336 337 igt_spinner_fini(&spin); 338 return err; 339 } 340 341 static int live_unlite_switch(void *arg) 342 { 343 return live_unlite_restore(arg, 0); 344 } 345 346 static int live_unlite_preempt(void *arg) 347 { 348 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); 349 } 350 351 static int live_unlite_ring(void *arg) 352 { 353 struct intel_gt *gt = arg; 354 struct intel_engine_cs *engine; 355 struct igt_spinner spin; 356 enum intel_engine_id id; 357 int err = 0; 358 359 /* 360 * Setup a preemption event that will cause almost the entire ring 361 * to be unwound, potentially fooling our intel_ring_direction() 362 * into emitting a forward lite-restore instead of the rollback. 363 */ 364 365 if (igt_spinner_init(&spin, gt)) 366 return -ENOMEM; 367 368 for_each_engine(engine, gt, id) { 369 struct intel_context *ce[2] = {}; 370 struct i915_request *rq; 371 struct igt_live_test t; 372 int n; 373 374 if (!intel_engine_has_preemption(engine)) 375 continue; 376 377 if (!intel_engine_can_store_dword(engine)) 378 continue; 379 380 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 381 err = -EIO; 382 break; 383 } 384 st_engine_heartbeat_disable(engine); 385 386 for (n = 0; n < ARRAY_SIZE(ce); n++) { 387 struct intel_context *tmp; 388 389 tmp = intel_context_create(engine); 390 if (IS_ERR(tmp)) { 391 err = PTR_ERR(tmp); 392 goto err_ce; 393 } 394 395 err = intel_context_pin(tmp); 396 if (err) { 397 intel_context_put(tmp); 398 goto err_ce; 399 } 400 401 memset32(tmp->ring->vaddr, 402 0xdeadbeef, /* trigger a hang if executed */ 403 tmp->ring->vma->size / sizeof(u32)); 404 405 ce[n] = tmp; 406 } 407 408 /* Create max prio spinner, followed by N low prio nops */ 409 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 410 if (IS_ERR(rq)) { 411 err = PTR_ERR(rq); 412 goto err_ce; 413 } 414 415 i915_request_get(rq); 416 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 417 i915_request_add(rq); 418 419 if (!igt_wait_for_spinner(&spin, rq)) { 420 intel_gt_set_wedged(gt); 421 i915_request_put(rq); 422 err = -ETIME; 423 goto err_ce; 424 } 425 426 /* Fill the ring, until we will cause a wrap */ 427 n = 0; 428 while (intel_ring_direction(ce[0]->ring, 429 rq->wa_tail, 430 ce[0]->ring->tail) <= 0) { 431 struct i915_request *tmp; 432 433 tmp = intel_context_create_request(ce[0]); 434 if (IS_ERR(tmp)) { 435 err = PTR_ERR(tmp); 436 i915_request_put(rq); 437 goto err_ce; 438 } 439 440 i915_request_add(tmp); 441 intel_engine_flush_submission(engine); 442 n++; 443 } 444 intel_engine_flush_submission(engine); 445 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", 446 engine->name, n, 447 ce[0]->ring->size, 448 ce[0]->ring->tail, 449 ce[0]->ring->emit, 450 rq->tail); 451 GEM_BUG_ON(intel_ring_direction(ce[0]->ring, 452 rq->tail, 453 ce[0]->ring->tail) <= 0); 454 i915_request_put(rq); 455 456 /* Create a second ring to preempt the first ring after rq[0] */ 457 rq = intel_context_create_request(ce[1]); 458 if (IS_ERR(rq)) { 459 err = PTR_ERR(rq); 460 goto err_ce; 461 } 462 463 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 464 i915_request_get(rq); 465 i915_request_add(rq); 466 467 err = wait_for_submit(engine, rq, HZ / 2); 468 i915_request_put(rq); 469 if (err) { 470 pr_err("%s: preemption request was not submitted\n", 471 engine->name); 472 err = -ETIME; 473 } 474 475 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", 476 engine->name, 477 ce[0]->ring->tail, ce[0]->ring->emit, 478 ce[1]->ring->tail, ce[1]->ring->emit); 479 480 err_ce: 481 intel_engine_flush_submission(engine); 482 igt_spinner_end(&spin); 483 for (n = 0; n < ARRAY_SIZE(ce); n++) { 484 if (IS_ERR_OR_NULL(ce[n])) 485 break; 486 487 intel_context_unpin(ce[n]); 488 intel_context_put(ce[n]); 489 } 490 st_engine_heartbeat_enable(engine); 491 if (igt_live_test_end(&t)) 492 err = -EIO; 493 if (err) 494 break; 495 } 496 497 igt_spinner_fini(&spin); 498 return err; 499 } 500 501 static int live_pin_rewind(void *arg) 502 { 503 struct intel_gt *gt = arg; 504 struct intel_engine_cs *engine; 505 enum intel_engine_id id; 506 int err = 0; 507 508 /* 509 * We have to be careful not to trust intel_ring too much, for example 510 * ring->head is updated upon retire which is out of sync with pinning 511 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, 512 * or else we risk writing an older, stale value. 513 * 514 * To simulate this, let's apply a bit of deliberate sabotague. 515 */ 516 517 for_each_engine(engine, gt, id) { 518 struct intel_context *ce; 519 struct i915_request *rq; 520 struct intel_ring *ring; 521 struct igt_live_test t; 522 523 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 524 err = -EIO; 525 break; 526 } 527 528 ce = intel_context_create(engine); 529 if (IS_ERR(ce)) { 530 err = PTR_ERR(ce); 531 break; 532 } 533 534 err = intel_context_pin(ce); 535 if (err) { 536 intel_context_put(ce); 537 break; 538 } 539 540 /* Keep the context awake while we play games */ 541 err = i915_active_acquire(&ce->active); 542 if (err) { 543 intel_context_unpin(ce); 544 intel_context_put(ce); 545 break; 546 } 547 ring = ce->ring; 548 549 /* Poison the ring, and offset the next request from HEAD */ 550 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); 551 ring->emit = ring->size / 2; 552 ring->tail = ring->emit; 553 GEM_BUG_ON(ring->head); 554 555 intel_context_unpin(ce); 556 557 /* Submit a simple nop request */ 558 GEM_BUG_ON(intel_context_is_pinned(ce)); 559 rq = intel_context_create_request(ce); 560 i915_active_release(&ce->active); /* e.g. async retire */ 561 intel_context_put(ce); 562 if (IS_ERR(rq)) { 563 err = PTR_ERR(rq); 564 break; 565 } 566 GEM_BUG_ON(!rq->head); 567 i915_request_add(rq); 568 569 /* Expect not to hang! */ 570 if (igt_live_test_end(&t)) { 571 err = -EIO; 572 break; 573 } 574 } 575 576 return err; 577 } 578 579 static int live_hold_reset(void *arg) 580 { 581 struct intel_gt *gt = arg; 582 struct intel_engine_cs *engine; 583 enum intel_engine_id id; 584 struct igt_spinner spin; 585 int err = 0; 586 587 /* 588 * In order to support offline error capture for fast preempt reset, 589 * we need to decouple the guilty request and ensure that it and its 590 * descendents are not executed while the capture is in progress. 591 */ 592 593 if (!intel_has_reset_engine(gt)) 594 return 0; 595 596 if (igt_spinner_init(&spin, gt)) 597 return -ENOMEM; 598 599 for_each_engine(engine, gt, id) { 600 struct intel_context *ce; 601 struct i915_request *rq; 602 603 ce = intel_context_create(engine); 604 if (IS_ERR(ce)) { 605 err = PTR_ERR(ce); 606 break; 607 } 608 609 st_engine_heartbeat_disable(engine); 610 611 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 612 if (IS_ERR(rq)) { 613 err = PTR_ERR(rq); 614 goto out; 615 } 616 i915_request_add(rq); 617 618 if (!igt_wait_for_spinner(&spin, rq)) { 619 intel_gt_set_wedged(gt); 620 err = -ETIME; 621 goto out; 622 } 623 624 /* We have our request executing, now remove it and reset */ 625 626 if (test_and_set_bit(I915_RESET_ENGINE + id, 627 >->reset.flags)) { 628 intel_gt_set_wedged(gt); 629 err = -EBUSY; 630 goto out; 631 } 632 tasklet_disable(&engine->execlists.tasklet); 633 634 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 635 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 636 637 i915_request_get(rq); 638 execlists_hold(engine, rq); 639 GEM_BUG_ON(!i915_request_on_hold(rq)); 640 641 intel_engine_reset(engine, NULL); 642 GEM_BUG_ON(rq->fence.error != -EIO); 643 644 tasklet_enable(&engine->execlists.tasklet); 645 clear_and_wake_up_bit(I915_RESET_ENGINE + id, 646 >->reset.flags); 647 648 /* Check that we do not resubmit the held request */ 649 if (!i915_request_wait(rq, 0, HZ / 5)) { 650 pr_err("%s: on hold request completed!\n", 651 engine->name); 652 i915_request_put(rq); 653 err = -EIO; 654 goto out; 655 } 656 GEM_BUG_ON(!i915_request_on_hold(rq)); 657 658 /* But is resubmitted on release */ 659 execlists_unhold(engine, rq); 660 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 661 pr_err("%s: held request did not complete!\n", 662 engine->name); 663 intel_gt_set_wedged(gt); 664 err = -ETIME; 665 } 666 i915_request_put(rq); 667 668 out: 669 st_engine_heartbeat_enable(engine); 670 intel_context_put(ce); 671 if (err) 672 break; 673 } 674 675 igt_spinner_fini(&spin); 676 return err; 677 } 678 679 static const char *error_repr(int err) 680 { 681 return err ? "bad" : "good"; 682 } 683 684 static int live_error_interrupt(void *arg) 685 { 686 static const struct error_phase { 687 enum { GOOD = 0, BAD = -EIO } error[2]; 688 } phases[] = { 689 { { BAD, GOOD } }, 690 { { BAD, BAD } }, 691 { { BAD, GOOD } }, 692 { { GOOD, GOOD } }, /* sentinel */ 693 }; 694 struct intel_gt *gt = arg; 695 struct intel_engine_cs *engine; 696 enum intel_engine_id id; 697 698 /* 699 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning 700 * of invalid commands in user batches that will cause a GPU hang. 701 * This is a faster mechanism than using hangcheck/heartbeats, but 702 * only detects problems the HW knows about -- it will not warn when 703 * we kill the HW! 704 * 705 * To verify our detection and reset, we throw some invalid commands 706 * at the HW and wait for the interrupt. 707 */ 708 709 if (!intel_has_reset_engine(gt)) 710 return 0; 711 712 for_each_engine(engine, gt, id) { 713 const struct error_phase *p; 714 int err = 0; 715 716 st_engine_heartbeat_disable(engine); 717 718 for (p = phases; p->error[0] != GOOD; p++) { 719 struct i915_request *client[ARRAY_SIZE(phases->error)]; 720 u32 *cs; 721 int i; 722 723 memset(client, 0, sizeof(*client)); 724 for (i = 0; i < ARRAY_SIZE(client); i++) { 725 struct intel_context *ce; 726 struct i915_request *rq; 727 728 ce = intel_context_create(engine); 729 if (IS_ERR(ce)) { 730 err = PTR_ERR(ce); 731 goto out; 732 } 733 734 rq = intel_context_create_request(ce); 735 intel_context_put(ce); 736 if (IS_ERR(rq)) { 737 err = PTR_ERR(rq); 738 goto out; 739 } 740 741 if (rq->engine->emit_init_breadcrumb) { 742 err = rq->engine->emit_init_breadcrumb(rq); 743 if (err) { 744 i915_request_add(rq); 745 goto out; 746 } 747 } 748 749 cs = intel_ring_begin(rq, 2); 750 if (IS_ERR(cs)) { 751 i915_request_add(rq); 752 err = PTR_ERR(cs); 753 goto out; 754 } 755 756 if (p->error[i]) { 757 *cs++ = 0xdeadbeef; 758 *cs++ = 0xdeadbeef; 759 } else { 760 *cs++ = MI_NOOP; 761 *cs++ = MI_NOOP; 762 } 763 764 client[i] = i915_request_get(rq); 765 i915_request_add(rq); 766 } 767 768 err = wait_for_submit(engine, client[0], HZ / 2); 769 if (err) { 770 pr_err("%s: first request did not start within time!\n", 771 engine->name); 772 err = -ETIME; 773 goto out; 774 } 775 776 for (i = 0; i < ARRAY_SIZE(client); i++) { 777 if (i915_request_wait(client[i], 0, HZ / 5) < 0) 778 pr_debug("%s: %s request incomplete!\n", 779 engine->name, 780 error_repr(p->error[i])); 781 782 if (!i915_request_started(client[i])) { 783 pr_err("%s: %s request not started!\n", 784 engine->name, 785 error_repr(p->error[i])); 786 err = -ETIME; 787 goto out; 788 } 789 790 /* Kick the tasklet to process the error */ 791 intel_engine_flush_submission(engine); 792 if (client[i]->fence.error != p->error[i]) { 793 pr_err("%s: %s request (%s) with wrong error code: %d\n", 794 engine->name, 795 error_repr(p->error[i]), 796 i915_request_completed(client[i]) ? "completed" : "running", 797 client[i]->fence.error); 798 err = -EINVAL; 799 goto out; 800 } 801 } 802 803 out: 804 for (i = 0; i < ARRAY_SIZE(client); i++) 805 if (client[i]) 806 i915_request_put(client[i]); 807 if (err) { 808 pr_err("%s: failed at phase[%zd] { %d, %d }\n", 809 engine->name, p - phases, 810 p->error[0], p->error[1]); 811 break; 812 } 813 } 814 815 st_engine_heartbeat_enable(engine); 816 if (err) { 817 intel_gt_set_wedged(gt); 818 return err; 819 } 820 } 821 822 return 0; 823 } 824 825 static int 826 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) 827 { 828 u32 *cs; 829 830 cs = intel_ring_begin(rq, 10); 831 if (IS_ERR(cs)) 832 return PTR_ERR(cs); 833 834 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 835 836 *cs++ = MI_SEMAPHORE_WAIT | 837 MI_SEMAPHORE_GLOBAL_GTT | 838 MI_SEMAPHORE_POLL | 839 MI_SEMAPHORE_SAD_NEQ_SDD; 840 *cs++ = 0; 841 *cs++ = i915_ggtt_offset(vma) + 4 * idx; 842 *cs++ = 0; 843 844 if (idx > 0) { 845 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 846 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 847 *cs++ = 0; 848 *cs++ = 1; 849 } else { 850 *cs++ = MI_NOOP; 851 *cs++ = MI_NOOP; 852 *cs++ = MI_NOOP; 853 *cs++ = MI_NOOP; 854 } 855 856 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 857 858 intel_ring_advance(rq, cs); 859 return 0; 860 } 861 862 static struct i915_request * 863 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) 864 { 865 struct intel_context *ce; 866 struct i915_request *rq; 867 int err; 868 869 ce = intel_context_create(engine); 870 if (IS_ERR(ce)) 871 return ERR_CAST(ce); 872 873 rq = intel_context_create_request(ce); 874 if (IS_ERR(rq)) 875 goto out_ce; 876 877 err = 0; 878 if (rq->engine->emit_init_breadcrumb) 879 err = rq->engine->emit_init_breadcrumb(rq); 880 if (err == 0) 881 err = emit_semaphore_chain(rq, vma, idx); 882 if (err == 0) 883 i915_request_get(rq); 884 i915_request_add(rq); 885 if (err) 886 rq = ERR_PTR(err); 887 888 out_ce: 889 intel_context_put(ce); 890 return rq; 891 } 892 893 static int 894 release_queue(struct intel_engine_cs *engine, 895 struct i915_vma *vma, 896 int idx, int prio) 897 { 898 struct i915_sched_attr attr = { 899 .priority = prio, 900 }; 901 struct i915_request *rq; 902 u32 *cs; 903 904 rq = intel_engine_create_kernel_request(engine); 905 if (IS_ERR(rq)) 906 return PTR_ERR(rq); 907 908 cs = intel_ring_begin(rq, 4); 909 if (IS_ERR(cs)) { 910 i915_request_add(rq); 911 return PTR_ERR(cs); 912 } 913 914 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 915 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 916 *cs++ = 0; 917 *cs++ = 1; 918 919 intel_ring_advance(rq, cs); 920 921 i915_request_get(rq); 922 i915_request_add(rq); 923 924 local_bh_disable(); 925 engine->schedule(rq, &attr); 926 local_bh_enable(); /* kick tasklet */ 927 928 i915_request_put(rq); 929 930 return 0; 931 } 932 933 static int 934 slice_semaphore_queue(struct intel_engine_cs *outer, 935 struct i915_vma *vma, 936 int count) 937 { 938 struct intel_engine_cs *engine; 939 struct i915_request *head; 940 enum intel_engine_id id; 941 int err, i, n = 0; 942 943 head = semaphore_queue(outer, vma, n++); 944 if (IS_ERR(head)) 945 return PTR_ERR(head); 946 947 for_each_engine(engine, outer->gt, id) { 948 for (i = 0; i < count; i++) { 949 struct i915_request *rq; 950 951 rq = semaphore_queue(engine, vma, n++); 952 if (IS_ERR(rq)) { 953 err = PTR_ERR(rq); 954 goto out; 955 } 956 957 i915_request_put(rq); 958 } 959 } 960 961 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); 962 if (err) 963 goto out; 964 965 if (i915_request_wait(head, 0, 966 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { 967 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", 968 count, n); 969 GEM_TRACE_DUMP(); 970 intel_gt_set_wedged(outer->gt); 971 err = -EIO; 972 } 973 974 out: 975 i915_request_put(head); 976 return err; 977 } 978 979 static int live_timeslice_preempt(void *arg) 980 { 981 struct intel_gt *gt = arg; 982 struct drm_i915_gem_object *obj; 983 struct intel_engine_cs *engine; 984 enum intel_engine_id id; 985 struct i915_vma *vma; 986 void *vaddr; 987 int err = 0; 988 989 /* 990 * If a request takes too long, we would like to give other users 991 * a fair go on the GPU. In particular, users may create batches 992 * that wait upon external input, where that input may even be 993 * supplied by another GPU job. To avoid blocking forever, we 994 * need to preempt the current task and replace it with another 995 * ready task. 996 */ 997 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 998 return 0; 999 1000 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1001 if (IS_ERR(obj)) 1002 return PTR_ERR(obj); 1003 1004 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1005 if (IS_ERR(vma)) { 1006 err = PTR_ERR(vma); 1007 goto err_obj; 1008 } 1009 1010 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 1011 if (IS_ERR(vaddr)) { 1012 err = PTR_ERR(vaddr); 1013 goto err_obj; 1014 } 1015 1016 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1017 if (err) 1018 goto err_map; 1019 1020 err = i915_vma_sync(vma); 1021 if (err) 1022 goto err_pin; 1023 1024 for_each_engine(engine, gt, id) { 1025 if (!intel_engine_has_preemption(engine)) 1026 continue; 1027 1028 memset(vaddr, 0, PAGE_SIZE); 1029 1030 st_engine_heartbeat_disable(engine); 1031 err = slice_semaphore_queue(engine, vma, 5); 1032 st_engine_heartbeat_enable(engine); 1033 if (err) 1034 goto err_pin; 1035 1036 if (igt_flush_test(gt->i915)) { 1037 err = -EIO; 1038 goto err_pin; 1039 } 1040 } 1041 1042 err_pin: 1043 i915_vma_unpin(vma); 1044 err_map: 1045 i915_gem_object_unpin_map(obj); 1046 err_obj: 1047 i915_gem_object_put(obj); 1048 return err; 1049 } 1050 1051 static struct i915_request * 1052 create_rewinder(struct intel_context *ce, 1053 struct i915_request *wait, 1054 void *slot, int idx) 1055 { 1056 const u32 offset = 1057 i915_ggtt_offset(ce->engine->status_page.vma) + 1058 offset_in_page(slot); 1059 struct i915_request *rq; 1060 u32 *cs; 1061 int err; 1062 1063 rq = intel_context_create_request(ce); 1064 if (IS_ERR(rq)) 1065 return rq; 1066 1067 if (wait) { 1068 err = i915_request_await_dma_fence(rq, &wait->fence); 1069 if (err) 1070 goto err; 1071 } 1072 1073 cs = intel_ring_begin(rq, 14); 1074 if (IS_ERR(cs)) { 1075 err = PTR_ERR(cs); 1076 goto err; 1077 } 1078 1079 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1080 *cs++ = MI_NOOP; 1081 1082 *cs++ = MI_SEMAPHORE_WAIT | 1083 MI_SEMAPHORE_GLOBAL_GTT | 1084 MI_SEMAPHORE_POLL | 1085 MI_SEMAPHORE_SAD_GTE_SDD; 1086 *cs++ = idx; 1087 *cs++ = offset; 1088 *cs++ = 0; 1089 1090 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 1091 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 1092 *cs++ = offset + idx * sizeof(u32); 1093 *cs++ = 0; 1094 1095 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1096 *cs++ = offset; 1097 *cs++ = 0; 1098 *cs++ = idx + 1; 1099 1100 intel_ring_advance(rq, cs); 1101 1102 rq->sched.attr.priority = I915_PRIORITY_MASK; 1103 err = 0; 1104 err: 1105 i915_request_get(rq); 1106 i915_request_add(rq); 1107 if (err) { 1108 i915_request_put(rq); 1109 return ERR_PTR(err); 1110 } 1111 1112 return rq; 1113 } 1114 1115 static int live_timeslice_rewind(void *arg) 1116 { 1117 struct intel_gt *gt = arg; 1118 struct intel_engine_cs *engine; 1119 enum intel_engine_id id; 1120 1121 /* 1122 * The usual presumption on timeslice expiration is that we replace 1123 * the active context with another. However, given a chain of 1124 * dependencies we may end up with replacing the context with itself, 1125 * but only a few of those requests, forcing us to rewind the 1126 * RING_TAIL of the original request. 1127 */ 1128 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1129 return 0; 1130 1131 for_each_engine(engine, gt, id) { 1132 enum { A1, A2, B1 }; 1133 enum { X = 1, Z, Y }; 1134 struct i915_request *rq[3] = {}; 1135 struct intel_context *ce; 1136 unsigned long timeslice; 1137 int i, err = 0; 1138 u32 *slot; 1139 1140 if (!intel_engine_has_timeslices(engine)) 1141 continue; 1142 1143 /* 1144 * A:rq1 -- semaphore wait, timestamp X 1145 * A:rq2 -- write timestamp Y 1146 * 1147 * B:rq1 [await A:rq1] -- write timestamp Z 1148 * 1149 * Force timeslice, release semaphore. 1150 * 1151 * Expect execution/evaluation order XZY 1152 */ 1153 1154 st_engine_heartbeat_disable(engine); 1155 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1156 1157 slot = memset32(engine->status_page.addr + 1000, 0, 4); 1158 1159 ce = intel_context_create(engine); 1160 if (IS_ERR(ce)) { 1161 err = PTR_ERR(ce); 1162 goto err; 1163 } 1164 1165 rq[A1] = create_rewinder(ce, NULL, slot, X); 1166 if (IS_ERR(rq[A1])) { 1167 intel_context_put(ce); 1168 goto err; 1169 } 1170 1171 rq[A2] = create_rewinder(ce, NULL, slot, Y); 1172 intel_context_put(ce); 1173 if (IS_ERR(rq[A2])) 1174 goto err; 1175 1176 err = wait_for_submit(engine, rq[A2], HZ / 2); 1177 if (err) { 1178 pr_err("%s: failed to submit first context\n", 1179 engine->name); 1180 goto err; 1181 } 1182 1183 ce = intel_context_create(engine); 1184 if (IS_ERR(ce)) { 1185 err = PTR_ERR(ce); 1186 goto err; 1187 } 1188 1189 rq[B1] = create_rewinder(ce, rq[A1], slot, Z); 1190 intel_context_put(ce); 1191 if (IS_ERR(rq[2])) 1192 goto err; 1193 1194 err = wait_for_submit(engine, rq[B1], HZ / 2); 1195 if (err) { 1196 pr_err("%s: failed to submit second context\n", 1197 engine->name); 1198 goto err; 1199 } 1200 1201 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ 1202 ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); 1203 if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */ 1204 /* Wait for the timeslice to kick in */ 1205 del_timer(&engine->execlists.timer); 1206 tasklet_hi_schedule(&engine->execlists.tasklet); 1207 intel_engine_flush_submission(engine); 1208 } 1209 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ 1210 GEM_BUG_ON(!i915_request_is_active(rq[A1])); 1211 GEM_BUG_ON(!i915_request_is_active(rq[B1])); 1212 GEM_BUG_ON(i915_request_is_active(rq[A2])); 1213 1214 /* Release the hounds! */ 1215 slot[0] = 1; 1216 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ 1217 1218 for (i = 1; i <= 3; i++) { 1219 unsigned long timeout = jiffies + HZ / 2; 1220 1221 while (!READ_ONCE(slot[i]) && 1222 time_before(jiffies, timeout)) 1223 ; 1224 1225 if (!time_before(jiffies, timeout)) { 1226 pr_err("%s: rq[%d] timed out\n", 1227 engine->name, i - 1); 1228 err = -ETIME; 1229 goto err; 1230 } 1231 1232 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); 1233 } 1234 1235 /* XZY: XZ < XY */ 1236 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { 1237 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", 1238 engine->name, 1239 slot[Z] - slot[X], 1240 slot[Y] - slot[X]); 1241 err = -EINVAL; 1242 } 1243 1244 err: 1245 memset32(&slot[0], -1, 4); 1246 wmb(); 1247 1248 engine->props.timeslice_duration_ms = timeslice; 1249 st_engine_heartbeat_enable(engine); 1250 for (i = 0; i < 3; i++) 1251 i915_request_put(rq[i]); 1252 if (igt_flush_test(gt->i915)) 1253 err = -EIO; 1254 if (err) 1255 return err; 1256 } 1257 1258 return 0; 1259 } 1260 1261 static struct i915_request *nop_request(struct intel_engine_cs *engine) 1262 { 1263 struct i915_request *rq; 1264 1265 rq = intel_engine_create_kernel_request(engine); 1266 if (IS_ERR(rq)) 1267 return rq; 1268 1269 i915_request_get(rq); 1270 i915_request_add(rq); 1271 1272 return rq; 1273 } 1274 1275 static long slice_timeout(struct intel_engine_cs *engine) 1276 { 1277 long timeout; 1278 1279 /* Enough time for a timeslice to kick in, and kick out */ 1280 timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine)); 1281 1282 /* Enough time for the nop request to complete */ 1283 timeout += HZ / 5; 1284 1285 return timeout + 1; 1286 } 1287 1288 static int live_timeslice_queue(void *arg) 1289 { 1290 struct intel_gt *gt = arg; 1291 struct drm_i915_gem_object *obj; 1292 struct intel_engine_cs *engine; 1293 enum intel_engine_id id; 1294 struct i915_vma *vma; 1295 void *vaddr; 1296 int err = 0; 1297 1298 /* 1299 * Make sure that even if ELSP[0] and ELSP[1] are filled with 1300 * timeslicing between them disabled, we *do* enable timeslicing 1301 * if the queue demands it. (Normally, we do not submit if 1302 * ELSP[1] is already occupied, so must rely on timeslicing to 1303 * eject ELSP[0] in favour of the queue.) 1304 */ 1305 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1306 return 0; 1307 1308 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1309 if (IS_ERR(obj)) 1310 return PTR_ERR(obj); 1311 1312 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1313 if (IS_ERR(vma)) { 1314 err = PTR_ERR(vma); 1315 goto err_obj; 1316 } 1317 1318 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 1319 if (IS_ERR(vaddr)) { 1320 err = PTR_ERR(vaddr); 1321 goto err_obj; 1322 } 1323 1324 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1325 if (err) 1326 goto err_map; 1327 1328 err = i915_vma_sync(vma); 1329 if (err) 1330 goto err_pin; 1331 1332 for_each_engine(engine, gt, id) { 1333 struct i915_sched_attr attr = { 1334 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 1335 }; 1336 struct i915_request *rq, *nop; 1337 1338 if (!intel_engine_has_preemption(engine)) 1339 continue; 1340 1341 st_engine_heartbeat_disable(engine); 1342 memset(vaddr, 0, PAGE_SIZE); 1343 1344 /* ELSP[0]: semaphore wait */ 1345 rq = semaphore_queue(engine, vma, 0); 1346 if (IS_ERR(rq)) { 1347 err = PTR_ERR(rq); 1348 goto err_heartbeat; 1349 } 1350 engine->schedule(rq, &attr); 1351 err = wait_for_submit(engine, rq, HZ / 2); 1352 if (err) { 1353 pr_err("%s: Timed out trying to submit semaphores\n", 1354 engine->name); 1355 goto err_rq; 1356 } 1357 1358 /* ELSP[1]: nop request */ 1359 nop = nop_request(engine); 1360 if (IS_ERR(nop)) { 1361 err = PTR_ERR(nop); 1362 goto err_rq; 1363 } 1364 err = wait_for_submit(engine, nop, HZ / 2); 1365 i915_request_put(nop); 1366 if (err) { 1367 pr_err("%s: Timed out trying to submit nop\n", 1368 engine->name); 1369 goto err_rq; 1370 } 1371 1372 GEM_BUG_ON(i915_request_completed(rq)); 1373 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 1374 1375 /* Queue: semaphore signal, matching priority as semaphore */ 1376 err = release_queue(engine, vma, 1, effective_prio(rq)); 1377 if (err) 1378 goto err_rq; 1379 1380 /* Wait until we ack the release_queue and start timeslicing */ 1381 do { 1382 cond_resched(); 1383 intel_engine_flush_submission(engine); 1384 } while (READ_ONCE(engine->execlists.pending[0])); 1385 1386 /* Timeslice every jiffy, so within 2 we should signal */ 1387 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) { 1388 struct drm_printer p = 1389 drm_info_printer(gt->i915->drm.dev); 1390 1391 pr_err("%s: Failed to timeslice into queue\n", 1392 engine->name); 1393 intel_engine_dump(engine, &p, 1394 "%s\n", engine->name); 1395 1396 memset(vaddr, 0xff, PAGE_SIZE); 1397 err = -EIO; 1398 } 1399 err_rq: 1400 i915_request_put(rq); 1401 err_heartbeat: 1402 st_engine_heartbeat_enable(engine); 1403 if (err) 1404 break; 1405 } 1406 1407 err_pin: 1408 i915_vma_unpin(vma); 1409 err_map: 1410 i915_gem_object_unpin_map(obj); 1411 err_obj: 1412 i915_gem_object_put(obj); 1413 return err; 1414 } 1415 1416 static int live_timeslice_nopreempt(void *arg) 1417 { 1418 struct intel_gt *gt = arg; 1419 struct intel_engine_cs *engine; 1420 enum intel_engine_id id; 1421 struct igt_spinner spin; 1422 int err = 0; 1423 1424 /* 1425 * We should not timeslice into a request that is marked with 1426 * I915_REQUEST_NOPREEMPT. 1427 */ 1428 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1429 return 0; 1430 1431 if (igt_spinner_init(&spin, gt)) 1432 return -ENOMEM; 1433 1434 for_each_engine(engine, gt, id) { 1435 struct intel_context *ce; 1436 struct i915_request *rq; 1437 unsigned long timeslice; 1438 1439 if (!intel_engine_has_preemption(engine)) 1440 continue; 1441 1442 ce = intel_context_create(engine); 1443 if (IS_ERR(ce)) { 1444 err = PTR_ERR(ce); 1445 break; 1446 } 1447 1448 st_engine_heartbeat_disable(engine); 1449 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1450 1451 /* Create an unpreemptible spinner */ 1452 1453 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 1454 intel_context_put(ce); 1455 if (IS_ERR(rq)) { 1456 err = PTR_ERR(rq); 1457 goto out_heartbeat; 1458 } 1459 1460 i915_request_get(rq); 1461 i915_request_add(rq); 1462 1463 if (!igt_wait_for_spinner(&spin, rq)) { 1464 i915_request_put(rq); 1465 err = -ETIME; 1466 goto out_spin; 1467 } 1468 1469 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); 1470 i915_request_put(rq); 1471 1472 /* Followed by a maximum priority barrier (heartbeat) */ 1473 1474 ce = intel_context_create(engine); 1475 if (IS_ERR(ce)) { 1476 err = PTR_ERR(ce); 1477 goto out_spin; 1478 } 1479 1480 rq = intel_context_create_request(ce); 1481 intel_context_put(ce); 1482 if (IS_ERR(rq)) { 1483 err = PTR_ERR(rq); 1484 goto out_spin; 1485 } 1486 1487 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 1488 i915_request_get(rq); 1489 i915_request_add(rq); 1490 1491 /* 1492 * Wait until the barrier is in ELSP, and we know timeslicing 1493 * will have been activated. 1494 */ 1495 if (wait_for_submit(engine, rq, HZ / 2)) { 1496 i915_request_put(rq); 1497 err = -ETIME; 1498 goto out_spin; 1499 } 1500 1501 /* 1502 * Since the ELSP[0] request is unpreemptible, it should not 1503 * allow the maximum priority barrier through. Wait long 1504 * enough to see if it is timesliced in by mistake. 1505 */ 1506 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) { 1507 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", 1508 engine->name); 1509 err = -EINVAL; 1510 } 1511 i915_request_put(rq); 1512 1513 out_spin: 1514 igt_spinner_end(&spin); 1515 out_heartbeat: 1516 xchg(&engine->props.timeslice_duration_ms, timeslice); 1517 st_engine_heartbeat_enable(engine); 1518 if (err) 1519 break; 1520 1521 if (igt_flush_test(gt->i915)) { 1522 err = -EIO; 1523 break; 1524 } 1525 } 1526 1527 igt_spinner_fini(&spin); 1528 return err; 1529 } 1530 1531 static int live_busywait_preempt(void *arg) 1532 { 1533 struct intel_gt *gt = arg; 1534 struct i915_gem_context *ctx_hi, *ctx_lo; 1535 struct intel_engine_cs *engine; 1536 struct drm_i915_gem_object *obj; 1537 struct i915_vma *vma; 1538 enum intel_engine_id id; 1539 int err = -ENOMEM; 1540 u32 *map; 1541 1542 /* 1543 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can 1544 * preempt the busywaits used to synchronise between rings. 1545 */ 1546 1547 ctx_hi = kernel_context(gt->i915); 1548 if (!ctx_hi) 1549 return -ENOMEM; 1550 ctx_hi->sched.priority = 1551 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1552 1553 ctx_lo = kernel_context(gt->i915); 1554 if (!ctx_lo) 1555 goto err_ctx_hi; 1556 ctx_lo->sched.priority = 1557 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1558 1559 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1560 if (IS_ERR(obj)) { 1561 err = PTR_ERR(obj); 1562 goto err_ctx_lo; 1563 } 1564 1565 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 1566 if (IS_ERR(map)) { 1567 err = PTR_ERR(map); 1568 goto err_obj; 1569 } 1570 1571 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1572 if (IS_ERR(vma)) { 1573 err = PTR_ERR(vma); 1574 goto err_map; 1575 } 1576 1577 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1578 if (err) 1579 goto err_map; 1580 1581 err = i915_vma_sync(vma); 1582 if (err) 1583 goto err_vma; 1584 1585 for_each_engine(engine, gt, id) { 1586 struct i915_request *lo, *hi; 1587 struct igt_live_test t; 1588 u32 *cs; 1589 1590 if (!intel_engine_has_preemption(engine)) 1591 continue; 1592 1593 if (!intel_engine_can_store_dword(engine)) 1594 continue; 1595 1596 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1597 err = -EIO; 1598 goto err_vma; 1599 } 1600 1601 /* 1602 * We create two requests. The low priority request 1603 * busywaits on a semaphore (inside the ringbuffer where 1604 * is should be preemptible) and the high priority requests 1605 * uses a MI_STORE_DWORD_IMM to update the semaphore value 1606 * allowing the first request to complete. If preemption 1607 * fails, we hang instead. 1608 */ 1609 1610 lo = igt_request_alloc(ctx_lo, engine); 1611 if (IS_ERR(lo)) { 1612 err = PTR_ERR(lo); 1613 goto err_vma; 1614 } 1615 1616 cs = intel_ring_begin(lo, 8); 1617 if (IS_ERR(cs)) { 1618 err = PTR_ERR(cs); 1619 i915_request_add(lo); 1620 goto err_vma; 1621 } 1622 1623 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1624 *cs++ = i915_ggtt_offset(vma); 1625 *cs++ = 0; 1626 *cs++ = 1; 1627 1628 /* XXX Do we need a flush + invalidate here? */ 1629 1630 *cs++ = MI_SEMAPHORE_WAIT | 1631 MI_SEMAPHORE_GLOBAL_GTT | 1632 MI_SEMAPHORE_POLL | 1633 MI_SEMAPHORE_SAD_EQ_SDD; 1634 *cs++ = 0; 1635 *cs++ = i915_ggtt_offset(vma); 1636 *cs++ = 0; 1637 1638 intel_ring_advance(lo, cs); 1639 1640 i915_request_get(lo); 1641 i915_request_add(lo); 1642 1643 if (wait_for(READ_ONCE(*map), 10)) { 1644 i915_request_put(lo); 1645 err = -ETIMEDOUT; 1646 goto err_vma; 1647 } 1648 1649 /* Low priority request should be busywaiting now */ 1650 if (i915_request_wait(lo, 0, 1) != -ETIME) { 1651 i915_request_put(lo); 1652 pr_err("%s: Busywaiting request did not!\n", 1653 engine->name); 1654 err = -EIO; 1655 goto err_vma; 1656 } 1657 1658 hi = igt_request_alloc(ctx_hi, engine); 1659 if (IS_ERR(hi)) { 1660 err = PTR_ERR(hi); 1661 i915_request_put(lo); 1662 goto err_vma; 1663 } 1664 1665 cs = intel_ring_begin(hi, 4); 1666 if (IS_ERR(cs)) { 1667 err = PTR_ERR(cs); 1668 i915_request_add(hi); 1669 i915_request_put(lo); 1670 goto err_vma; 1671 } 1672 1673 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1674 *cs++ = i915_ggtt_offset(vma); 1675 *cs++ = 0; 1676 *cs++ = 0; 1677 1678 intel_ring_advance(hi, cs); 1679 i915_request_add(hi); 1680 1681 if (i915_request_wait(lo, 0, HZ / 5) < 0) { 1682 struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1683 1684 pr_err("%s: Failed to preempt semaphore busywait!\n", 1685 engine->name); 1686 1687 intel_engine_dump(engine, &p, "%s\n", engine->name); 1688 GEM_TRACE_DUMP(); 1689 1690 i915_request_put(lo); 1691 intel_gt_set_wedged(gt); 1692 err = -EIO; 1693 goto err_vma; 1694 } 1695 GEM_BUG_ON(READ_ONCE(*map)); 1696 i915_request_put(lo); 1697 1698 if (igt_live_test_end(&t)) { 1699 err = -EIO; 1700 goto err_vma; 1701 } 1702 } 1703 1704 err = 0; 1705 err_vma: 1706 i915_vma_unpin(vma); 1707 err_map: 1708 i915_gem_object_unpin_map(obj); 1709 err_obj: 1710 i915_gem_object_put(obj); 1711 err_ctx_lo: 1712 kernel_context_close(ctx_lo); 1713 err_ctx_hi: 1714 kernel_context_close(ctx_hi); 1715 return err; 1716 } 1717 1718 static struct i915_request * 1719 spinner_create_request(struct igt_spinner *spin, 1720 struct i915_gem_context *ctx, 1721 struct intel_engine_cs *engine, 1722 u32 arb) 1723 { 1724 struct intel_context *ce; 1725 struct i915_request *rq; 1726 1727 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 1728 if (IS_ERR(ce)) 1729 return ERR_CAST(ce); 1730 1731 rq = igt_spinner_create_request(spin, ce, arb); 1732 intel_context_put(ce); 1733 return rq; 1734 } 1735 1736 static int live_preempt(void *arg) 1737 { 1738 struct intel_gt *gt = arg; 1739 struct i915_gem_context *ctx_hi, *ctx_lo; 1740 struct igt_spinner spin_hi, spin_lo; 1741 struct intel_engine_cs *engine; 1742 enum intel_engine_id id; 1743 int err = -ENOMEM; 1744 1745 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1746 return 0; 1747 1748 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) 1749 pr_err("Logical preemption supported, but not exposed\n"); 1750 1751 if (igt_spinner_init(&spin_hi, gt)) 1752 return -ENOMEM; 1753 1754 if (igt_spinner_init(&spin_lo, gt)) 1755 goto err_spin_hi; 1756 1757 ctx_hi = kernel_context(gt->i915); 1758 if (!ctx_hi) 1759 goto err_spin_lo; 1760 ctx_hi->sched.priority = 1761 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1762 1763 ctx_lo = kernel_context(gt->i915); 1764 if (!ctx_lo) 1765 goto err_ctx_hi; 1766 ctx_lo->sched.priority = 1767 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1768 1769 for_each_engine(engine, gt, id) { 1770 struct igt_live_test t; 1771 struct i915_request *rq; 1772 1773 if (!intel_engine_has_preemption(engine)) 1774 continue; 1775 1776 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1777 err = -EIO; 1778 goto err_ctx_lo; 1779 } 1780 1781 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1782 MI_ARB_CHECK); 1783 if (IS_ERR(rq)) { 1784 err = PTR_ERR(rq); 1785 goto err_ctx_lo; 1786 } 1787 1788 i915_request_add(rq); 1789 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1790 GEM_TRACE("lo spinner failed to start\n"); 1791 GEM_TRACE_DUMP(); 1792 intel_gt_set_wedged(gt); 1793 err = -EIO; 1794 goto err_ctx_lo; 1795 } 1796 1797 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1798 MI_ARB_CHECK); 1799 if (IS_ERR(rq)) { 1800 igt_spinner_end(&spin_lo); 1801 err = PTR_ERR(rq); 1802 goto err_ctx_lo; 1803 } 1804 1805 i915_request_add(rq); 1806 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1807 GEM_TRACE("hi spinner failed to start\n"); 1808 GEM_TRACE_DUMP(); 1809 intel_gt_set_wedged(gt); 1810 err = -EIO; 1811 goto err_ctx_lo; 1812 } 1813 1814 igt_spinner_end(&spin_hi); 1815 igt_spinner_end(&spin_lo); 1816 1817 if (igt_live_test_end(&t)) { 1818 err = -EIO; 1819 goto err_ctx_lo; 1820 } 1821 } 1822 1823 err = 0; 1824 err_ctx_lo: 1825 kernel_context_close(ctx_lo); 1826 err_ctx_hi: 1827 kernel_context_close(ctx_hi); 1828 err_spin_lo: 1829 igt_spinner_fini(&spin_lo); 1830 err_spin_hi: 1831 igt_spinner_fini(&spin_hi); 1832 return err; 1833 } 1834 1835 static int live_late_preempt(void *arg) 1836 { 1837 struct intel_gt *gt = arg; 1838 struct i915_gem_context *ctx_hi, *ctx_lo; 1839 struct igt_spinner spin_hi, spin_lo; 1840 struct intel_engine_cs *engine; 1841 struct i915_sched_attr attr = {}; 1842 enum intel_engine_id id; 1843 int err = -ENOMEM; 1844 1845 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1846 return 0; 1847 1848 if (igt_spinner_init(&spin_hi, gt)) 1849 return -ENOMEM; 1850 1851 if (igt_spinner_init(&spin_lo, gt)) 1852 goto err_spin_hi; 1853 1854 ctx_hi = kernel_context(gt->i915); 1855 if (!ctx_hi) 1856 goto err_spin_lo; 1857 1858 ctx_lo = kernel_context(gt->i915); 1859 if (!ctx_lo) 1860 goto err_ctx_hi; 1861 1862 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ 1863 ctx_lo->sched.priority = I915_USER_PRIORITY(1); 1864 1865 for_each_engine(engine, gt, id) { 1866 struct igt_live_test t; 1867 struct i915_request *rq; 1868 1869 if (!intel_engine_has_preemption(engine)) 1870 continue; 1871 1872 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1873 err = -EIO; 1874 goto err_ctx_lo; 1875 } 1876 1877 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1878 MI_ARB_CHECK); 1879 if (IS_ERR(rq)) { 1880 err = PTR_ERR(rq); 1881 goto err_ctx_lo; 1882 } 1883 1884 i915_request_add(rq); 1885 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1886 pr_err("First context failed to start\n"); 1887 goto err_wedged; 1888 } 1889 1890 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1891 MI_NOOP); 1892 if (IS_ERR(rq)) { 1893 igt_spinner_end(&spin_lo); 1894 err = PTR_ERR(rq); 1895 goto err_ctx_lo; 1896 } 1897 1898 i915_request_add(rq); 1899 if (igt_wait_for_spinner(&spin_hi, rq)) { 1900 pr_err("Second context overtook first?\n"); 1901 goto err_wedged; 1902 } 1903 1904 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1905 engine->schedule(rq, &attr); 1906 1907 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1908 pr_err("High priority context failed to preempt the low priority context\n"); 1909 GEM_TRACE_DUMP(); 1910 goto err_wedged; 1911 } 1912 1913 igt_spinner_end(&spin_hi); 1914 igt_spinner_end(&spin_lo); 1915 1916 if (igt_live_test_end(&t)) { 1917 err = -EIO; 1918 goto err_ctx_lo; 1919 } 1920 } 1921 1922 err = 0; 1923 err_ctx_lo: 1924 kernel_context_close(ctx_lo); 1925 err_ctx_hi: 1926 kernel_context_close(ctx_hi); 1927 err_spin_lo: 1928 igt_spinner_fini(&spin_lo); 1929 err_spin_hi: 1930 igt_spinner_fini(&spin_hi); 1931 return err; 1932 1933 err_wedged: 1934 igt_spinner_end(&spin_hi); 1935 igt_spinner_end(&spin_lo); 1936 intel_gt_set_wedged(gt); 1937 err = -EIO; 1938 goto err_ctx_lo; 1939 } 1940 1941 struct preempt_client { 1942 struct igt_spinner spin; 1943 struct i915_gem_context *ctx; 1944 }; 1945 1946 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) 1947 { 1948 c->ctx = kernel_context(gt->i915); 1949 if (!c->ctx) 1950 return -ENOMEM; 1951 1952 if (igt_spinner_init(&c->spin, gt)) 1953 goto err_ctx; 1954 1955 return 0; 1956 1957 err_ctx: 1958 kernel_context_close(c->ctx); 1959 return -ENOMEM; 1960 } 1961 1962 static void preempt_client_fini(struct preempt_client *c) 1963 { 1964 igt_spinner_fini(&c->spin); 1965 kernel_context_close(c->ctx); 1966 } 1967 1968 static int live_nopreempt(void *arg) 1969 { 1970 struct intel_gt *gt = arg; 1971 struct intel_engine_cs *engine; 1972 struct preempt_client a, b; 1973 enum intel_engine_id id; 1974 int err = -ENOMEM; 1975 1976 /* 1977 * Verify that we can disable preemption for an individual request 1978 * that may be being observed and not want to be interrupted. 1979 */ 1980 1981 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1982 return 0; 1983 1984 if (preempt_client_init(gt, &a)) 1985 return -ENOMEM; 1986 if (preempt_client_init(gt, &b)) 1987 goto err_client_a; 1988 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1989 1990 for_each_engine(engine, gt, id) { 1991 struct i915_request *rq_a, *rq_b; 1992 1993 if (!intel_engine_has_preemption(engine)) 1994 continue; 1995 1996 engine->execlists.preempt_hang.count = 0; 1997 1998 rq_a = spinner_create_request(&a.spin, 1999 a.ctx, engine, 2000 MI_ARB_CHECK); 2001 if (IS_ERR(rq_a)) { 2002 err = PTR_ERR(rq_a); 2003 goto err_client_b; 2004 } 2005 2006 /* Low priority client, but unpreemptable! */ 2007 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); 2008 2009 i915_request_add(rq_a); 2010 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2011 pr_err("First client failed to start\n"); 2012 goto err_wedged; 2013 } 2014 2015 rq_b = spinner_create_request(&b.spin, 2016 b.ctx, engine, 2017 MI_ARB_CHECK); 2018 if (IS_ERR(rq_b)) { 2019 err = PTR_ERR(rq_b); 2020 goto err_client_b; 2021 } 2022 2023 i915_request_add(rq_b); 2024 2025 /* B is much more important than A! (But A is unpreemptable.) */ 2026 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); 2027 2028 /* Wait long enough for preemption and timeslicing */ 2029 if (igt_wait_for_spinner(&b.spin, rq_b)) { 2030 pr_err("Second client started too early!\n"); 2031 goto err_wedged; 2032 } 2033 2034 igt_spinner_end(&a.spin); 2035 2036 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2037 pr_err("Second client failed to start\n"); 2038 goto err_wedged; 2039 } 2040 2041 igt_spinner_end(&b.spin); 2042 2043 if (engine->execlists.preempt_hang.count) { 2044 pr_err("Preemption recorded x%d; should have been suppressed!\n", 2045 engine->execlists.preempt_hang.count); 2046 err = -EINVAL; 2047 goto err_wedged; 2048 } 2049 2050 if (igt_flush_test(gt->i915)) 2051 goto err_wedged; 2052 } 2053 2054 err = 0; 2055 err_client_b: 2056 preempt_client_fini(&b); 2057 err_client_a: 2058 preempt_client_fini(&a); 2059 return err; 2060 2061 err_wedged: 2062 igt_spinner_end(&b.spin); 2063 igt_spinner_end(&a.spin); 2064 intel_gt_set_wedged(gt); 2065 err = -EIO; 2066 goto err_client_b; 2067 } 2068 2069 struct live_preempt_cancel { 2070 struct intel_engine_cs *engine; 2071 struct preempt_client a, b; 2072 }; 2073 2074 static int __cancel_active0(struct live_preempt_cancel *arg) 2075 { 2076 struct i915_request *rq; 2077 struct igt_live_test t; 2078 int err; 2079 2080 /* Preempt cancel of ELSP0 */ 2081 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2082 if (igt_live_test_begin(&t, arg->engine->i915, 2083 __func__, arg->engine->name)) 2084 return -EIO; 2085 2086 rq = spinner_create_request(&arg->a.spin, 2087 arg->a.ctx, arg->engine, 2088 MI_ARB_CHECK); 2089 if (IS_ERR(rq)) 2090 return PTR_ERR(rq); 2091 2092 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2093 i915_request_get(rq); 2094 i915_request_add(rq); 2095 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2096 err = -EIO; 2097 goto out; 2098 } 2099 2100 intel_context_set_banned(rq->context); 2101 err = intel_engine_pulse(arg->engine); 2102 if (err) 2103 goto out; 2104 2105 err = wait_for_reset(arg->engine, rq, HZ / 2); 2106 if (err) { 2107 pr_err("Cancelled inflight0 request did not reset\n"); 2108 goto out; 2109 } 2110 2111 out: 2112 i915_request_put(rq); 2113 if (igt_live_test_end(&t)) 2114 err = -EIO; 2115 return err; 2116 } 2117 2118 static int __cancel_active1(struct live_preempt_cancel *arg) 2119 { 2120 struct i915_request *rq[2] = {}; 2121 struct igt_live_test t; 2122 int err; 2123 2124 /* Preempt cancel of ELSP1 */ 2125 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2126 if (igt_live_test_begin(&t, arg->engine->i915, 2127 __func__, arg->engine->name)) 2128 return -EIO; 2129 2130 rq[0] = spinner_create_request(&arg->a.spin, 2131 arg->a.ctx, arg->engine, 2132 MI_NOOP); /* no preemption */ 2133 if (IS_ERR(rq[0])) 2134 return PTR_ERR(rq[0]); 2135 2136 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2137 i915_request_get(rq[0]); 2138 i915_request_add(rq[0]); 2139 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2140 err = -EIO; 2141 goto out; 2142 } 2143 2144 rq[1] = spinner_create_request(&arg->b.spin, 2145 arg->b.ctx, arg->engine, 2146 MI_ARB_CHECK); 2147 if (IS_ERR(rq[1])) { 2148 err = PTR_ERR(rq[1]); 2149 goto out; 2150 } 2151 2152 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2153 i915_request_get(rq[1]); 2154 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2155 i915_request_add(rq[1]); 2156 if (err) 2157 goto out; 2158 2159 intel_context_set_banned(rq[1]->context); 2160 err = intel_engine_pulse(arg->engine); 2161 if (err) 2162 goto out; 2163 2164 igt_spinner_end(&arg->a.spin); 2165 err = wait_for_reset(arg->engine, rq[1], HZ / 2); 2166 if (err) 2167 goto out; 2168 2169 if (rq[0]->fence.error != 0) { 2170 pr_err("Normal inflight0 request did not complete\n"); 2171 err = -EINVAL; 2172 goto out; 2173 } 2174 2175 if (rq[1]->fence.error != -EIO) { 2176 pr_err("Cancelled inflight1 request did not report -EIO\n"); 2177 err = -EINVAL; 2178 goto out; 2179 } 2180 2181 out: 2182 i915_request_put(rq[1]); 2183 i915_request_put(rq[0]); 2184 if (igt_live_test_end(&t)) 2185 err = -EIO; 2186 return err; 2187 } 2188 2189 static int __cancel_queued(struct live_preempt_cancel *arg) 2190 { 2191 struct i915_request *rq[3] = {}; 2192 struct igt_live_test t; 2193 int err; 2194 2195 /* Full ELSP and one in the wings */ 2196 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2197 if (igt_live_test_begin(&t, arg->engine->i915, 2198 __func__, arg->engine->name)) 2199 return -EIO; 2200 2201 rq[0] = spinner_create_request(&arg->a.spin, 2202 arg->a.ctx, arg->engine, 2203 MI_ARB_CHECK); 2204 if (IS_ERR(rq[0])) 2205 return PTR_ERR(rq[0]); 2206 2207 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2208 i915_request_get(rq[0]); 2209 i915_request_add(rq[0]); 2210 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2211 err = -EIO; 2212 goto out; 2213 } 2214 2215 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); 2216 if (IS_ERR(rq[1])) { 2217 err = PTR_ERR(rq[1]); 2218 goto out; 2219 } 2220 2221 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2222 i915_request_get(rq[1]); 2223 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2224 i915_request_add(rq[1]); 2225 if (err) 2226 goto out; 2227 2228 rq[2] = spinner_create_request(&arg->b.spin, 2229 arg->a.ctx, arg->engine, 2230 MI_ARB_CHECK); 2231 if (IS_ERR(rq[2])) { 2232 err = PTR_ERR(rq[2]); 2233 goto out; 2234 } 2235 2236 i915_request_get(rq[2]); 2237 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); 2238 i915_request_add(rq[2]); 2239 if (err) 2240 goto out; 2241 2242 intel_context_set_banned(rq[2]->context); 2243 err = intel_engine_pulse(arg->engine); 2244 if (err) 2245 goto out; 2246 2247 err = wait_for_reset(arg->engine, rq[2], HZ / 2); 2248 if (err) 2249 goto out; 2250 2251 if (rq[0]->fence.error != -EIO) { 2252 pr_err("Cancelled inflight0 request did not report -EIO\n"); 2253 err = -EINVAL; 2254 goto out; 2255 } 2256 2257 if (rq[1]->fence.error != 0) { 2258 pr_err("Normal inflight1 request did not complete\n"); 2259 err = -EINVAL; 2260 goto out; 2261 } 2262 2263 if (rq[2]->fence.error != -EIO) { 2264 pr_err("Cancelled queued request did not report -EIO\n"); 2265 err = -EINVAL; 2266 goto out; 2267 } 2268 2269 out: 2270 i915_request_put(rq[2]); 2271 i915_request_put(rq[1]); 2272 i915_request_put(rq[0]); 2273 if (igt_live_test_end(&t)) 2274 err = -EIO; 2275 return err; 2276 } 2277 2278 static int __cancel_hostile(struct live_preempt_cancel *arg) 2279 { 2280 struct i915_request *rq; 2281 int err; 2282 2283 /* Preempt cancel non-preemptible spinner in ELSP0 */ 2284 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2285 return 0; 2286 2287 if (!intel_has_reset_engine(arg->engine->gt)) 2288 return 0; 2289 2290 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2291 rq = spinner_create_request(&arg->a.spin, 2292 arg->a.ctx, arg->engine, 2293 MI_NOOP); /* preemption disabled */ 2294 if (IS_ERR(rq)) 2295 return PTR_ERR(rq); 2296 2297 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2298 i915_request_get(rq); 2299 i915_request_add(rq); 2300 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2301 err = -EIO; 2302 goto out; 2303 } 2304 2305 intel_context_set_banned(rq->context); 2306 err = intel_engine_pulse(arg->engine); /* force reset */ 2307 if (err) 2308 goto out; 2309 2310 err = wait_for_reset(arg->engine, rq, HZ / 2); 2311 if (err) { 2312 pr_err("Cancelled inflight0 request did not reset\n"); 2313 goto out; 2314 } 2315 2316 out: 2317 i915_request_put(rq); 2318 if (igt_flush_test(arg->engine->i915)) 2319 err = -EIO; 2320 return err; 2321 } 2322 2323 static int live_preempt_cancel(void *arg) 2324 { 2325 struct intel_gt *gt = arg; 2326 struct live_preempt_cancel data; 2327 enum intel_engine_id id; 2328 int err = -ENOMEM; 2329 2330 /* 2331 * To cancel an inflight context, we need to first remove it from the 2332 * GPU. That sounds like preemption! Plus a little bit of bookkeeping. 2333 */ 2334 2335 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2336 return 0; 2337 2338 if (preempt_client_init(gt, &data.a)) 2339 return -ENOMEM; 2340 if (preempt_client_init(gt, &data.b)) 2341 goto err_client_a; 2342 2343 for_each_engine(data.engine, gt, id) { 2344 if (!intel_engine_has_preemption(data.engine)) 2345 continue; 2346 2347 err = __cancel_active0(&data); 2348 if (err) 2349 goto err_wedged; 2350 2351 err = __cancel_active1(&data); 2352 if (err) 2353 goto err_wedged; 2354 2355 err = __cancel_queued(&data); 2356 if (err) 2357 goto err_wedged; 2358 2359 err = __cancel_hostile(&data); 2360 if (err) 2361 goto err_wedged; 2362 } 2363 2364 err = 0; 2365 err_client_b: 2366 preempt_client_fini(&data.b); 2367 err_client_a: 2368 preempt_client_fini(&data.a); 2369 return err; 2370 2371 err_wedged: 2372 GEM_TRACE_DUMP(); 2373 igt_spinner_end(&data.b.spin); 2374 igt_spinner_end(&data.a.spin); 2375 intel_gt_set_wedged(gt); 2376 goto err_client_b; 2377 } 2378 2379 static int live_suppress_self_preempt(void *arg) 2380 { 2381 struct intel_gt *gt = arg; 2382 struct intel_engine_cs *engine; 2383 struct i915_sched_attr attr = { 2384 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) 2385 }; 2386 struct preempt_client a, b; 2387 enum intel_engine_id id; 2388 int err = -ENOMEM; 2389 2390 /* 2391 * Verify that if a preemption request does not cause a change in 2392 * the current execution order, the preempt-to-idle injection is 2393 * skipped and that we do not accidentally apply it after the CS 2394 * completion event. 2395 */ 2396 2397 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2398 return 0; 2399 2400 if (intel_uc_uses_guc_submission(>->uc)) 2401 return 0; /* presume black blox */ 2402 2403 if (intel_vgpu_active(gt->i915)) 2404 return 0; /* GVT forces single port & request submission */ 2405 2406 if (preempt_client_init(gt, &a)) 2407 return -ENOMEM; 2408 if (preempt_client_init(gt, &b)) 2409 goto err_client_a; 2410 2411 for_each_engine(engine, gt, id) { 2412 struct i915_request *rq_a, *rq_b; 2413 int depth; 2414 2415 if (!intel_engine_has_preemption(engine)) 2416 continue; 2417 2418 if (igt_flush_test(gt->i915)) 2419 goto err_wedged; 2420 2421 st_engine_heartbeat_disable(engine); 2422 engine->execlists.preempt_hang.count = 0; 2423 2424 rq_a = spinner_create_request(&a.spin, 2425 a.ctx, engine, 2426 MI_NOOP); 2427 if (IS_ERR(rq_a)) { 2428 err = PTR_ERR(rq_a); 2429 st_engine_heartbeat_enable(engine); 2430 goto err_client_b; 2431 } 2432 2433 i915_request_add(rq_a); 2434 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2435 pr_err("First client failed to start\n"); 2436 st_engine_heartbeat_enable(engine); 2437 goto err_wedged; 2438 } 2439 2440 /* Keep postponing the timer to avoid premature slicing */ 2441 mod_timer(&engine->execlists.timer, jiffies + HZ); 2442 for (depth = 0; depth < 8; depth++) { 2443 rq_b = spinner_create_request(&b.spin, 2444 b.ctx, engine, 2445 MI_NOOP); 2446 if (IS_ERR(rq_b)) { 2447 err = PTR_ERR(rq_b); 2448 st_engine_heartbeat_enable(engine); 2449 goto err_client_b; 2450 } 2451 i915_request_add(rq_b); 2452 2453 GEM_BUG_ON(i915_request_completed(rq_a)); 2454 engine->schedule(rq_a, &attr); 2455 igt_spinner_end(&a.spin); 2456 2457 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2458 pr_err("Second client failed to start\n"); 2459 st_engine_heartbeat_enable(engine); 2460 goto err_wedged; 2461 } 2462 2463 swap(a, b); 2464 rq_a = rq_b; 2465 } 2466 igt_spinner_end(&a.spin); 2467 2468 if (engine->execlists.preempt_hang.count) { 2469 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", 2470 engine->name, 2471 engine->execlists.preempt_hang.count, 2472 depth); 2473 st_engine_heartbeat_enable(engine); 2474 err = -EINVAL; 2475 goto err_client_b; 2476 } 2477 2478 st_engine_heartbeat_enable(engine); 2479 if (igt_flush_test(gt->i915)) 2480 goto err_wedged; 2481 } 2482 2483 err = 0; 2484 err_client_b: 2485 preempt_client_fini(&b); 2486 err_client_a: 2487 preempt_client_fini(&a); 2488 return err; 2489 2490 err_wedged: 2491 igt_spinner_end(&b.spin); 2492 igt_spinner_end(&a.spin); 2493 intel_gt_set_wedged(gt); 2494 err = -EIO; 2495 goto err_client_b; 2496 } 2497 2498 static int live_chain_preempt(void *arg) 2499 { 2500 struct intel_gt *gt = arg; 2501 struct intel_engine_cs *engine; 2502 struct preempt_client hi, lo; 2503 enum intel_engine_id id; 2504 int err = -ENOMEM; 2505 2506 /* 2507 * Build a chain AB...BA between two contexts (A, B) and request 2508 * preemption of the last request. It should then complete before 2509 * the previously submitted spinner in B. 2510 */ 2511 2512 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2513 return 0; 2514 2515 if (preempt_client_init(gt, &hi)) 2516 return -ENOMEM; 2517 2518 if (preempt_client_init(gt, &lo)) 2519 goto err_client_hi; 2520 2521 for_each_engine(engine, gt, id) { 2522 struct i915_sched_attr attr = { 2523 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 2524 }; 2525 struct igt_live_test t; 2526 struct i915_request *rq; 2527 int ring_size, count, i; 2528 2529 if (!intel_engine_has_preemption(engine)) 2530 continue; 2531 2532 rq = spinner_create_request(&lo.spin, 2533 lo.ctx, engine, 2534 MI_ARB_CHECK); 2535 if (IS_ERR(rq)) 2536 goto err_wedged; 2537 2538 i915_request_get(rq); 2539 i915_request_add(rq); 2540 2541 ring_size = rq->wa_tail - rq->head; 2542 if (ring_size < 0) 2543 ring_size += rq->ring->size; 2544 ring_size = rq->ring->size / ring_size; 2545 pr_debug("%s(%s): Using maximum of %d requests\n", 2546 __func__, engine->name, ring_size); 2547 2548 igt_spinner_end(&lo.spin); 2549 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 2550 pr_err("Timed out waiting to flush %s\n", engine->name); 2551 i915_request_put(rq); 2552 goto err_wedged; 2553 } 2554 i915_request_put(rq); 2555 2556 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 2557 err = -EIO; 2558 goto err_wedged; 2559 } 2560 2561 for_each_prime_number_from(count, 1, ring_size) { 2562 rq = spinner_create_request(&hi.spin, 2563 hi.ctx, engine, 2564 MI_ARB_CHECK); 2565 if (IS_ERR(rq)) 2566 goto err_wedged; 2567 i915_request_add(rq); 2568 if (!igt_wait_for_spinner(&hi.spin, rq)) 2569 goto err_wedged; 2570 2571 rq = spinner_create_request(&lo.spin, 2572 lo.ctx, engine, 2573 MI_ARB_CHECK); 2574 if (IS_ERR(rq)) 2575 goto err_wedged; 2576 i915_request_add(rq); 2577 2578 for (i = 0; i < count; i++) { 2579 rq = igt_request_alloc(lo.ctx, engine); 2580 if (IS_ERR(rq)) 2581 goto err_wedged; 2582 i915_request_add(rq); 2583 } 2584 2585 rq = igt_request_alloc(hi.ctx, engine); 2586 if (IS_ERR(rq)) 2587 goto err_wedged; 2588 2589 i915_request_get(rq); 2590 i915_request_add(rq); 2591 engine->schedule(rq, &attr); 2592 2593 igt_spinner_end(&hi.spin); 2594 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2595 struct drm_printer p = 2596 drm_info_printer(gt->i915->drm.dev); 2597 2598 pr_err("Failed to preempt over chain of %d\n", 2599 count); 2600 intel_engine_dump(engine, &p, 2601 "%s\n", engine->name); 2602 i915_request_put(rq); 2603 goto err_wedged; 2604 } 2605 igt_spinner_end(&lo.spin); 2606 i915_request_put(rq); 2607 2608 rq = igt_request_alloc(lo.ctx, engine); 2609 if (IS_ERR(rq)) 2610 goto err_wedged; 2611 2612 i915_request_get(rq); 2613 i915_request_add(rq); 2614 2615 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2616 struct drm_printer p = 2617 drm_info_printer(gt->i915->drm.dev); 2618 2619 pr_err("Failed to flush low priority chain of %d requests\n", 2620 count); 2621 intel_engine_dump(engine, &p, 2622 "%s\n", engine->name); 2623 2624 i915_request_put(rq); 2625 goto err_wedged; 2626 } 2627 i915_request_put(rq); 2628 } 2629 2630 if (igt_live_test_end(&t)) { 2631 err = -EIO; 2632 goto err_wedged; 2633 } 2634 } 2635 2636 err = 0; 2637 err_client_lo: 2638 preempt_client_fini(&lo); 2639 err_client_hi: 2640 preempt_client_fini(&hi); 2641 return err; 2642 2643 err_wedged: 2644 igt_spinner_end(&hi.spin); 2645 igt_spinner_end(&lo.spin); 2646 intel_gt_set_wedged(gt); 2647 err = -EIO; 2648 goto err_client_lo; 2649 } 2650 2651 static int create_gang(struct intel_engine_cs *engine, 2652 struct i915_request **prev) 2653 { 2654 struct drm_i915_gem_object *obj; 2655 struct intel_context *ce; 2656 struct i915_request *rq; 2657 struct i915_vma *vma; 2658 u32 *cs; 2659 int err; 2660 2661 ce = intel_context_create(engine); 2662 if (IS_ERR(ce)) 2663 return PTR_ERR(ce); 2664 2665 obj = i915_gem_object_create_internal(engine->i915, 4096); 2666 if (IS_ERR(obj)) { 2667 err = PTR_ERR(obj); 2668 goto err_ce; 2669 } 2670 2671 vma = i915_vma_instance(obj, ce->vm, NULL); 2672 if (IS_ERR(vma)) { 2673 err = PTR_ERR(vma); 2674 goto err_obj; 2675 } 2676 2677 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2678 if (err) 2679 goto err_obj; 2680 2681 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 2682 if (IS_ERR(cs)) 2683 goto err_obj; 2684 2685 /* Semaphore target: spin until zero */ 2686 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2687 2688 *cs++ = MI_SEMAPHORE_WAIT | 2689 MI_SEMAPHORE_POLL | 2690 MI_SEMAPHORE_SAD_EQ_SDD; 2691 *cs++ = 0; 2692 *cs++ = lower_32_bits(vma->node.start); 2693 *cs++ = upper_32_bits(vma->node.start); 2694 2695 if (*prev) { 2696 u64 offset = (*prev)->batch->node.start; 2697 2698 /* Terminate the spinner in the next lower priority batch. */ 2699 *cs++ = MI_STORE_DWORD_IMM_GEN4; 2700 *cs++ = lower_32_bits(offset); 2701 *cs++ = upper_32_bits(offset); 2702 *cs++ = 0; 2703 } 2704 2705 *cs++ = MI_BATCH_BUFFER_END; 2706 i915_gem_object_flush_map(obj); 2707 i915_gem_object_unpin_map(obj); 2708 2709 rq = intel_context_create_request(ce); 2710 if (IS_ERR(rq)) 2711 goto err_obj; 2712 2713 rq->batch = i915_vma_get(vma); 2714 i915_request_get(rq); 2715 2716 i915_vma_lock(vma); 2717 err = i915_request_await_object(rq, vma->obj, false); 2718 if (!err) 2719 err = i915_vma_move_to_active(vma, rq, 0); 2720 if (!err) 2721 err = rq->engine->emit_bb_start(rq, 2722 vma->node.start, 2723 PAGE_SIZE, 0); 2724 i915_vma_unlock(vma); 2725 i915_request_add(rq); 2726 if (err) 2727 goto err_rq; 2728 2729 i915_gem_object_put(obj); 2730 intel_context_put(ce); 2731 2732 rq->client_link.next = &(*prev)->client_link; 2733 *prev = rq; 2734 return 0; 2735 2736 err_rq: 2737 i915_vma_put(rq->batch); 2738 i915_request_put(rq); 2739 err_obj: 2740 i915_gem_object_put(obj); 2741 err_ce: 2742 intel_context_put(ce); 2743 return err; 2744 } 2745 2746 static int __live_preempt_ring(struct intel_engine_cs *engine, 2747 struct igt_spinner *spin, 2748 int queue_sz, int ring_sz) 2749 { 2750 struct intel_context *ce[2] = {}; 2751 struct i915_request *rq; 2752 struct igt_live_test t; 2753 int err = 0; 2754 int n; 2755 2756 if (igt_live_test_begin(&t, engine->i915, __func__, engine->name)) 2757 return -EIO; 2758 2759 for (n = 0; n < ARRAY_SIZE(ce); n++) { 2760 struct intel_context *tmp; 2761 2762 tmp = intel_context_create(engine); 2763 if (IS_ERR(tmp)) { 2764 err = PTR_ERR(tmp); 2765 goto err_ce; 2766 } 2767 2768 tmp->ring = __intel_context_ring_size(ring_sz); 2769 2770 err = intel_context_pin(tmp); 2771 if (err) { 2772 intel_context_put(tmp); 2773 goto err_ce; 2774 } 2775 2776 memset32(tmp->ring->vaddr, 2777 0xdeadbeef, /* trigger a hang if executed */ 2778 tmp->ring->vma->size / sizeof(u32)); 2779 2780 ce[n] = tmp; 2781 } 2782 2783 rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK); 2784 if (IS_ERR(rq)) { 2785 err = PTR_ERR(rq); 2786 goto err_ce; 2787 } 2788 2789 i915_request_get(rq); 2790 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2791 i915_request_add(rq); 2792 2793 if (!igt_wait_for_spinner(spin, rq)) { 2794 intel_gt_set_wedged(engine->gt); 2795 i915_request_put(rq); 2796 err = -ETIME; 2797 goto err_ce; 2798 } 2799 2800 /* Fill the ring, until we will cause a wrap */ 2801 n = 0; 2802 while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) { 2803 struct i915_request *tmp; 2804 2805 tmp = intel_context_create_request(ce[0]); 2806 if (IS_ERR(tmp)) { 2807 err = PTR_ERR(tmp); 2808 i915_request_put(rq); 2809 goto err_ce; 2810 } 2811 2812 i915_request_add(tmp); 2813 intel_engine_flush_submission(engine); 2814 n++; 2815 } 2816 intel_engine_flush_submission(engine); 2817 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", 2818 engine->name, queue_sz, n, 2819 ce[0]->ring->size, 2820 ce[0]->ring->tail, 2821 ce[0]->ring->emit, 2822 rq->tail); 2823 i915_request_put(rq); 2824 2825 /* Create a second request to preempt the first ring */ 2826 rq = intel_context_create_request(ce[1]); 2827 if (IS_ERR(rq)) { 2828 err = PTR_ERR(rq); 2829 goto err_ce; 2830 } 2831 2832 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2833 i915_request_get(rq); 2834 i915_request_add(rq); 2835 2836 err = wait_for_submit(engine, rq, HZ / 2); 2837 i915_request_put(rq); 2838 if (err) { 2839 pr_err("%s: preemption request was not submited\n", 2840 engine->name); 2841 err = -ETIME; 2842 } 2843 2844 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", 2845 engine->name, 2846 ce[0]->ring->tail, ce[0]->ring->emit, 2847 ce[1]->ring->tail, ce[1]->ring->emit); 2848 2849 err_ce: 2850 intel_engine_flush_submission(engine); 2851 igt_spinner_end(spin); 2852 for (n = 0; n < ARRAY_SIZE(ce); n++) { 2853 if (IS_ERR_OR_NULL(ce[n])) 2854 break; 2855 2856 intel_context_unpin(ce[n]); 2857 intel_context_put(ce[n]); 2858 } 2859 if (igt_live_test_end(&t)) 2860 err = -EIO; 2861 return err; 2862 } 2863 2864 static int live_preempt_ring(void *arg) 2865 { 2866 struct intel_gt *gt = arg; 2867 struct intel_engine_cs *engine; 2868 struct igt_spinner spin; 2869 enum intel_engine_id id; 2870 int err = 0; 2871 2872 /* 2873 * Check that we rollback large chunks of a ring in order to do a 2874 * preemption event. Similar to live_unlite_ring, but looking at 2875 * ring size rather than the impact of intel_ring_direction(). 2876 */ 2877 2878 if (igt_spinner_init(&spin, gt)) 2879 return -ENOMEM; 2880 2881 for_each_engine(engine, gt, id) { 2882 int n; 2883 2884 if (!intel_engine_has_preemption(engine)) 2885 continue; 2886 2887 if (!intel_engine_can_store_dword(engine)) 2888 continue; 2889 2890 st_engine_heartbeat_disable(engine); 2891 2892 for (n = 0; n <= 3; n++) { 2893 err = __live_preempt_ring(engine, &spin, 2894 n * SZ_4K / 4, SZ_4K); 2895 if (err) 2896 break; 2897 } 2898 2899 st_engine_heartbeat_enable(engine); 2900 if (err) 2901 break; 2902 } 2903 2904 igt_spinner_fini(&spin); 2905 return err; 2906 } 2907 2908 static int live_preempt_gang(void *arg) 2909 { 2910 struct intel_gt *gt = arg; 2911 struct intel_engine_cs *engine; 2912 enum intel_engine_id id; 2913 2914 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2915 return 0; 2916 2917 /* 2918 * Build as long a chain of preempters as we can, with each 2919 * request higher priority than the last. Once we are ready, we release 2920 * the last batch which then precolates down the chain, each releasing 2921 * the next oldest in turn. The intent is to simply push as hard as we 2922 * can with the number of preemptions, trying to exceed narrow HW 2923 * limits. At a minimum, we insist that we can sort all the user 2924 * high priority levels into execution order. 2925 */ 2926 2927 for_each_engine(engine, gt, id) { 2928 struct i915_request *rq = NULL; 2929 struct igt_live_test t; 2930 IGT_TIMEOUT(end_time); 2931 int prio = 0; 2932 int err = 0; 2933 u32 *cs; 2934 2935 if (!intel_engine_has_preemption(engine)) 2936 continue; 2937 2938 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) 2939 return -EIO; 2940 2941 do { 2942 struct i915_sched_attr attr = { 2943 .priority = I915_USER_PRIORITY(prio++), 2944 }; 2945 2946 err = create_gang(engine, &rq); 2947 if (err) 2948 break; 2949 2950 /* Submit each spinner at increasing priority */ 2951 engine->schedule(rq, &attr); 2952 } while (prio <= I915_PRIORITY_MAX && 2953 !__igt_timeout(end_time, NULL)); 2954 pr_debug("%s: Preempt chain of %d requests\n", 2955 engine->name, prio); 2956 2957 /* 2958 * Such that the last spinner is the highest priority and 2959 * should execute first. When that spinner completes, 2960 * it will terminate the next lowest spinner until there 2961 * are no more spinners and the gang is complete. 2962 */ 2963 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); 2964 if (!IS_ERR(cs)) { 2965 *cs = 0; 2966 i915_gem_object_unpin_map(rq->batch->obj); 2967 } else { 2968 err = PTR_ERR(cs); 2969 intel_gt_set_wedged(gt); 2970 } 2971 2972 while (rq) { /* wait for each rq from highest to lowest prio */ 2973 struct i915_request *n = 2974 list_next_entry(rq, client_link); 2975 2976 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { 2977 struct drm_printer p = 2978 drm_info_printer(engine->i915->drm.dev); 2979 2980 pr_err("Failed to flush chain of %d requests, at %d\n", 2981 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); 2982 intel_engine_dump(engine, &p, 2983 "%s\n", engine->name); 2984 2985 err = -ETIME; 2986 } 2987 2988 i915_vma_put(rq->batch); 2989 i915_request_put(rq); 2990 rq = n; 2991 } 2992 2993 if (igt_live_test_end(&t)) 2994 err = -EIO; 2995 if (err) 2996 return err; 2997 } 2998 2999 return 0; 3000 } 3001 3002 static struct i915_vma * 3003 create_gpr_user(struct intel_engine_cs *engine, 3004 struct i915_vma *result, 3005 unsigned int offset) 3006 { 3007 struct drm_i915_gem_object *obj; 3008 struct i915_vma *vma; 3009 u32 *cs; 3010 int err; 3011 int i; 3012 3013 obj = i915_gem_object_create_internal(engine->i915, 4096); 3014 if (IS_ERR(obj)) 3015 return ERR_CAST(obj); 3016 3017 vma = i915_vma_instance(obj, result->vm, NULL); 3018 if (IS_ERR(vma)) { 3019 i915_gem_object_put(obj); 3020 return vma; 3021 } 3022 3023 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3024 if (err) { 3025 i915_vma_put(vma); 3026 return ERR_PTR(err); 3027 } 3028 3029 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 3030 if (IS_ERR(cs)) { 3031 i915_vma_put(vma); 3032 return ERR_CAST(cs); 3033 } 3034 3035 /* All GPR are clear for new contexts. We use GPR(0) as a constant */ 3036 *cs++ = MI_LOAD_REGISTER_IMM(1); 3037 *cs++ = CS_GPR(engine, 0); 3038 *cs++ = 1; 3039 3040 for (i = 1; i < NUM_GPR; i++) { 3041 u64 addr; 3042 3043 /* 3044 * Perform: GPR[i]++ 3045 * 3046 * As we read and write into the context saved GPR[i], if 3047 * we restart this batch buffer from an earlier point, we 3048 * will repeat the increment and store a value > 1. 3049 */ 3050 *cs++ = MI_MATH(4); 3051 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i)); 3052 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0)); 3053 *cs++ = MI_MATH_ADD; 3054 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU); 3055 3056 addr = result->node.start + offset + i * sizeof(*cs); 3057 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 3058 *cs++ = CS_GPR(engine, 2 * i); 3059 *cs++ = lower_32_bits(addr); 3060 *cs++ = upper_32_bits(addr); 3061 3062 *cs++ = MI_SEMAPHORE_WAIT | 3063 MI_SEMAPHORE_POLL | 3064 MI_SEMAPHORE_SAD_GTE_SDD; 3065 *cs++ = i; 3066 *cs++ = lower_32_bits(result->node.start); 3067 *cs++ = upper_32_bits(result->node.start); 3068 } 3069 3070 *cs++ = MI_BATCH_BUFFER_END; 3071 i915_gem_object_flush_map(obj); 3072 i915_gem_object_unpin_map(obj); 3073 3074 return vma; 3075 } 3076 3077 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) 3078 { 3079 struct drm_i915_gem_object *obj; 3080 struct i915_vma *vma; 3081 int err; 3082 3083 obj = i915_gem_object_create_internal(gt->i915, sz); 3084 if (IS_ERR(obj)) 3085 return ERR_CAST(obj); 3086 3087 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 3088 if (IS_ERR(vma)) { 3089 i915_gem_object_put(obj); 3090 return vma; 3091 } 3092 3093 err = i915_ggtt_pin(vma, 0, 0); 3094 if (err) { 3095 i915_vma_put(vma); 3096 return ERR_PTR(err); 3097 } 3098 3099 return vma; 3100 } 3101 3102 static struct i915_request * 3103 create_gpr_client(struct intel_engine_cs *engine, 3104 struct i915_vma *global, 3105 unsigned int offset) 3106 { 3107 struct i915_vma *batch, *vma; 3108 struct intel_context *ce; 3109 struct i915_request *rq; 3110 int err; 3111 3112 ce = intel_context_create(engine); 3113 if (IS_ERR(ce)) 3114 return ERR_CAST(ce); 3115 3116 vma = i915_vma_instance(global->obj, ce->vm, NULL); 3117 if (IS_ERR(vma)) { 3118 err = PTR_ERR(vma); 3119 goto out_ce; 3120 } 3121 3122 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3123 if (err) 3124 goto out_ce; 3125 3126 batch = create_gpr_user(engine, vma, offset); 3127 if (IS_ERR(batch)) { 3128 err = PTR_ERR(batch); 3129 goto out_vma; 3130 } 3131 3132 rq = intel_context_create_request(ce); 3133 if (IS_ERR(rq)) { 3134 err = PTR_ERR(rq); 3135 goto out_batch; 3136 } 3137 3138 i915_vma_lock(vma); 3139 err = i915_request_await_object(rq, vma->obj, false); 3140 if (!err) 3141 err = i915_vma_move_to_active(vma, rq, 0); 3142 i915_vma_unlock(vma); 3143 3144 i915_vma_lock(batch); 3145 if (!err) 3146 err = i915_request_await_object(rq, batch->obj, false); 3147 if (!err) 3148 err = i915_vma_move_to_active(batch, rq, 0); 3149 if (!err) 3150 err = rq->engine->emit_bb_start(rq, 3151 batch->node.start, 3152 PAGE_SIZE, 0); 3153 i915_vma_unlock(batch); 3154 i915_vma_unpin(batch); 3155 3156 if (!err) 3157 i915_request_get(rq); 3158 i915_request_add(rq); 3159 3160 out_batch: 3161 i915_vma_put(batch); 3162 out_vma: 3163 i915_vma_unpin(vma); 3164 out_ce: 3165 intel_context_put(ce); 3166 return err ? ERR_PTR(err) : rq; 3167 } 3168 3169 static int preempt_user(struct intel_engine_cs *engine, 3170 struct i915_vma *global, 3171 int id) 3172 { 3173 struct i915_sched_attr attr = { 3174 .priority = I915_PRIORITY_MAX 3175 }; 3176 struct i915_request *rq; 3177 int err = 0; 3178 u32 *cs; 3179 3180 rq = intel_engine_create_kernel_request(engine); 3181 if (IS_ERR(rq)) 3182 return PTR_ERR(rq); 3183 3184 cs = intel_ring_begin(rq, 4); 3185 if (IS_ERR(cs)) { 3186 i915_request_add(rq); 3187 return PTR_ERR(cs); 3188 } 3189 3190 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 3191 *cs++ = i915_ggtt_offset(global); 3192 *cs++ = 0; 3193 *cs++ = id; 3194 3195 intel_ring_advance(rq, cs); 3196 3197 i915_request_get(rq); 3198 i915_request_add(rq); 3199 3200 engine->schedule(rq, &attr); 3201 3202 if (i915_request_wait(rq, 0, HZ / 2) < 0) 3203 err = -ETIME; 3204 i915_request_put(rq); 3205 3206 return err; 3207 } 3208 3209 static int live_preempt_user(void *arg) 3210 { 3211 struct intel_gt *gt = arg; 3212 struct intel_engine_cs *engine; 3213 struct i915_vma *global; 3214 enum intel_engine_id id; 3215 u32 *result; 3216 int err = 0; 3217 3218 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 3219 return 0; 3220 3221 /* 3222 * In our other tests, we look at preemption in carefully 3223 * controlled conditions in the ringbuffer. Since most of the 3224 * time is spent in user batches, most of our preemptions naturally 3225 * occur there. We want to verify that when we preempt inside a batch 3226 * we continue on from the current instruction and do not roll back 3227 * to the start, or another earlier arbitration point. 3228 * 3229 * To verify this, we create a batch which is a mixture of 3230 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with 3231 * a few preempting contexts thrown into the mix, we look for any 3232 * repeated instructions (which show up as incorrect values). 3233 */ 3234 3235 global = create_global(gt, 4096); 3236 if (IS_ERR(global)) 3237 return PTR_ERR(global); 3238 3239 result = i915_gem_object_pin_map(global->obj, I915_MAP_WC); 3240 if (IS_ERR(result)) { 3241 i915_vma_unpin_and_release(&global, 0); 3242 return PTR_ERR(result); 3243 } 3244 3245 for_each_engine(engine, gt, id) { 3246 struct i915_request *client[3] = {}; 3247 struct igt_live_test t; 3248 int i; 3249 3250 if (!intel_engine_has_preemption(engine)) 3251 continue; 3252 3253 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS) 3254 continue; /* we need per-context GPR */ 3255 3256 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 3257 err = -EIO; 3258 break; 3259 } 3260 3261 memset(result, 0, 4096); 3262 3263 for (i = 0; i < ARRAY_SIZE(client); i++) { 3264 struct i915_request *rq; 3265 3266 rq = create_gpr_client(engine, global, 3267 NUM_GPR * i * sizeof(u32)); 3268 if (IS_ERR(rq)) 3269 goto end_test; 3270 3271 client[i] = rq; 3272 } 3273 3274 /* Continuously preempt the set of 3 running contexts */ 3275 for (i = 1; i <= NUM_GPR; i++) { 3276 err = preempt_user(engine, global, i); 3277 if (err) 3278 goto end_test; 3279 } 3280 3281 if (READ_ONCE(result[0]) != NUM_GPR) { 3282 pr_err("%s: Failed to release semaphore\n", 3283 engine->name); 3284 err = -EIO; 3285 goto end_test; 3286 } 3287 3288 for (i = 0; i < ARRAY_SIZE(client); i++) { 3289 int gpr; 3290 3291 if (i915_request_wait(client[i], 0, HZ / 2) < 0) { 3292 err = -ETIME; 3293 goto end_test; 3294 } 3295 3296 for (gpr = 1; gpr < NUM_GPR; gpr++) { 3297 if (result[NUM_GPR * i + gpr] != 1) { 3298 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n", 3299 engine->name, 3300 i, gpr, result[NUM_GPR * i + gpr]); 3301 err = -EINVAL; 3302 goto end_test; 3303 } 3304 } 3305 } 3306 3307 end_test: 3308 for (i = 0; i < ARRAY_SIZE(client); i++) { 3309 if (!client[i]) 3310 break; 3311 3312 i915_request_put(client[i]); 3313 } 3314 3315 /* Flush the semaphores on error */ 3316 smp_store_mb(result[0], -1); 3317 if (igt_live_test_end(&t)) 3318 err = -EIO; 3319 if (err) 3320 break; 3321 } 3322 3323 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP); 3324 return err; 3325 } 3326 3327 static int live_preempt_timeout(void *arg) 3328 { 3329 struct intel_gt *gt = arg; 3330 struct i915_gem_context *ctx_hi, *ctx_lo; 3331 struct igt_spinner spin_lo; 3332 struct intel_engine_cs *engine; 3333 enum intel_engine_id id; 3334 int err = -ENOMEM; 3335 3336 /* 3337 * Check that we force preemption to occur by cancelling the previous 3338 * context if it refuses to yield the GPU. 3339 */ 3340 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 3341 return 0; 3342 3343 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 3344 return 0; 3345 3346 if (!intel_has_reset_engine(gt)) 3347 return 0; 3348 3349 if (igt_spinner_init(&spin_lo, gt)) 3350 return -ENOMEM; 3351 3352 ctx_hi = kernel_context(gt->i915); 3353 if (!ctx_hi) 3354 goto err_spin_lo; 3355 ctx_hi->sched.priority = 3356 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 3357 3358 ctx_lo = kernel_context(gt->i915); 3359 if (!ctx_lo) 3360 goto err_ctx_hi; 3361 ctx_lo->sched.priority = 3362 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 3363 3364 for_each_engine(engine, gt, id) { 3365 unsigned long saved_timeout; 3366 struct i915_request *rq; 3367 3368 if (!intel_engine_has_preemption(engine)) 3369 continue; 3370 3371 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 3372 MI_NOOP); /* preemption disabled */ 3373 if (IS_ERR(rq)) { 3374 err = PTR_ERR(rq); 3375 goto err_ctx_lo; 3376 } 3377 3378 i915_request_add(rq); 3379 if (!igt_wait_for_spinner(&spin_lo, rq)) { 3380 intel_gt_set_wedged(gt); 3381 err = -EIO; 3382 goto err_ctx_lo; 3383 } 3384 3385 rq = igt_request_alloc(ctx_hi, engine); 3386 if (IS_ERR(rq)) { 3387 igt_spinner_end(&spin_lo); 3388 err = PTR_ERR(rq); 3389 goto err_ctx_lo; 3390 } 3391 3392 /* Flush the previous CS ack before changing timeouts */ 3393 while (READ_ONCE(engine->execlists.pending[0])) 3394 cpu_relax(); 3395 3396 saved_timeout = engine->props.preempt_timeout_ms; 3397 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ 3398 3399 i915_request_get(rq); 3400 i915_request_add(rq); 3401 3402 intel_engine_flush_submission(engine); 3403 engine->props.preempt_timeout_ms = saved_timeout; 3404 3405 if (i915_request_wait(rq, 0, HZ / 10) < 0) { 3406 intel_gt_set_wedged(gt); 3407 i915_request_put(rq); 3408 err = -ETIME; 3409 goto err_ctx_lo; 3410 } 3411 3412 igt_spinner_end(&spin_lo); 3413 i915_request_put(rq); 3414 } 3415 3416 err = 0; 3417 err_ctx_lo: 3418 kernel_context_close(ctx_lo); 3419 err_ctx_hi: 3420 kernel_context_close(ctx_hi); 3421 err_spin_lo: 3422 igt_spinner_fini(&spin_lo); 3423 return err; 3424 } 3425 3426 static int random_range(struct rnd_state *rnd, int min, int max) 3427 { 3428 return i915_prandom_u32_max_state(max - min, rnd) + min; 3429 } 3430 3431 static int random_priority(struct rnd_state *rnd) 3432 { 3433 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); 3434 } 3435 3436 struct preempt_smoke { 3437 struct intel_gt *gt; 3438 struct i915_gem_context **contexts; 3439 struct intel_engine_cs *engine; 3440 struct drm_i915_gem_object *batch; 3441 unsigned int ncontext; 3442 struct rnd_state prng; 3443 unsigned long count; 3444 }; 3445 3446 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) 3447 { 3448 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, 3449 &smoke->prng)]; 3450 } 3451 3452 static int smoke_submit(struct preempt_smoke *smoke, 3453 struct i915_gem_context *ctx, int prio, 3454 struct drm_i915_gem_object *batch) 3455 { 3456 struct i915_request *rq; 3457 struct i915_vma *vma = NULL; 3458 int err = 0; 3459 3460 if (batch) { 3461 struct i915_address_space *vm; 3462 3463 vm = i915_gem_context_get_vm_rcu(ctx); 3464 vma = i915_vma_instance(batch, vm, NULL); 3465 i915_vm_put(vm); 3466 if (IS_ERR(vma)) 3467 return PTR_ERR(vma); 3468 3469 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3470 if (err) 3471 return err; 3472 } 3473 3474 ctx->sched.priority = prio; 3475 3476 rq = igt_request_alloc(ctx, smoke->engine); 3477 if (IS_ERR(rq)) { 3478 err = PTR_ERR(rq); 3479 goto unpin; 3480 } 3481 3482 if (vma) { 3483 i915_vma_lock(vma); 3484 err = i915_request_await_object(rq, vma->obj, false); 3485 if (!err) 3486 err = i915_vma_move_to_active(vma, rq, 0); 3487 if (!err) 3488 err = rq->engine->emit_bb_start(rq, 3489 vma->node.start, 3490 PAGE_SIZE, 0); 3491 i915_vma_unlock(vma); 3492 } 3493 3494 i915_request_add(rq); 3495 3496 unpin: 3497 if (vma) 3498 i915_vma_unpin(vma); 3499 3500 return err; 3501 } 3502 3503 static int smoke_crescendo_thread(void *arg) 3504 { 3505 struct preempt_smoke *smoke = arg; 3506 IGT_TIMEOUT(end_time); 3507 unsigned long count; 3508 3509 count = 0; 3510 do { 3511 struct i915_gem_context *ctx = smoke_context(smoke); 3512 int err; 3513 3514 err = smoke_submit(smoke, 3515 ctx, count % I915_PRIORITY_MAX, 3516 smoke->batch); 3517 if (err) 3518 return err; 3519 3520 count++; 3521 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); 3522 3523 smoke->count = count; 3524 return 0; 3525 } 3526 3527 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) 3528 #define BATCH BIT(0) 3529 { 3530 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 3531 struct preempt_smoke arg[I915_NUM_ENGINES]; 3532 struct intel_engine_cs *engine; 3533 enum intel_engine_id id; 3534 unsigned long count; 3535 int err = 0; 3536 3537 for_each_engine(engine, smoke->gt, id) { 3538 arg[id] = *smoke; 3539 arg[id].engine = engine; 3540 if (!(flags & BATCH)) 3541 arg[id].batch = NULL; 3542 arg[id].count = 0; 3543 3544 tsk[id] = kthread_run(smoke_crescendo_thread, &arg, 3545 "igt/smoke:%d", id); 3546 if (IS_ERR(tsk[id])) { 3547 err = PTR_ERR(tsk[id]); 3548 break; 3549 } 3550 get_task_struct(tsk[id]); 3551 } 3552 3553 yield(); /* start all threads before we kthread_stop() */ 3554 3555 count = 0; 3556 for_each_engine(engine, smoke->gt, id) { 3557 int status; 3558 3559 if (IS_ERR_OR_NULL(tsk[id])) 3560 continue; 3561 3562 status = kthread_stop(tsk[id]); 3563 if (status && !err) 3564 err = status; 3565 3566 count += arg[id].count; 3567 3568 put_task_struct(tsk[id]); 3569 } 3570 3571 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", 3572 count, flags, 3573 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 3574 return 0; 3575 } 3576 3577 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) 3578 { 3579 enum intel_engine_id id; 3580 IGT_TIMEOUT(end_time); 3581 unsigned long count; 3582 3583 count = 0; 3584 do { 3585 for_each_engine(smoke->engine, smoke->gt, id) { 3586 struct i915_gem_context *ctx = smoke_context(smoke); 3587 int err; 3588 3589 err = smoke_submit(smoke, 3590 ctx, random_priority(&smoke->prng), 3591 flags & BATCH ? smoke->batch : NULL); 3592 if (err) 3593 return err; 3594 3595 count++; 3596 } 3597 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); 3598 3599 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", 3600 count, flags, 3601 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 3602 return 0; 3603 } 3604 3605 static int live_preempt_smoke(void *arg) 3606 { 3607 struct preempt_smoke smoke = { 3608 .gt = arg, 3609 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), 3610 .ncontext = 256, 3611 }; 3612 const unsigned int phase[] = { 0, BATCH }; 3613 struct igt_live_test t; 3614 int err = -ENOMEM; 3615 u32 *cs; 3616 int n; 3617 3618 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) 3619 return 0; 3620 3621 smoke.contexts = kmalloc_array(smoke.ncontext, 3622 sizeof(*smoke.contexts), 3623 GFP_KERNEL); 3624 if (!smoke.contexts) 3625 return -ENOMEM; 3626 3627 smoke.batch = 3628 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); 3629 if (IS_ERR(smoke.batch)) { 3630 err = PTR_ERR(smoke.batch); 3631 goto err_free; 3632 } 3633 3634 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); 3635 if (IS_ERR(cs)) { 3636 err = PTR_ERR(cs); 3637 goto err_batch; 3638 } 3639 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) 3640 cs[n] = MI_ARB_CHECK; 3641 cs[n] = MI_BATCH_BUFFER_END; 3642 i915_gem_object_flush_map(smoke.batch); 3643 i915_gem_object_unpin_map(smoke.batch); 3644 3645 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { 3646 err = -EIO; 3647 goto err_batch; 3648 } 3649 3650 for (n = 0; n < smoke.ncontext; n++) { 3651 smoke.contexts[n] = kernel_context(smoke.gt->i915); 3652 if (!smoke.contexts[n]) 3653 goto err_ctx; 3654 } 3655 3656 for (n = 0; n < ARRAY_SIZE(phase); n++) { 3657 err = smoke_crescendo(&smoke, phase[n]); 3658 if (err) 3659 goto err_ctx; 3660 3661 err = smoke_random(&smoke, phase[n]); 3662 if (err) 3663 goto err_ctx; 3664 } 3665 3666 err_ctx: 3667 if (igt_live_test_end(&t)) 3668 err = -EIO; 3669 3670 for (n = 0; n < smoke.ncontext; n++) { 3671 if (!smoke.contexts[n]) 3672 break; 3673 kernel_context_close(smoke.contexts[n]); 3674 } 3675 3676 err_batch: 3677 i915_gem_object_put(smoke.batch); 3678 err_free: 3679 kfree(smoke.contexts); 3680 3681 return err; 3682 } 3683 3684 static int nop_virtual_engine(struct intel_gt *gt, 3685 struct intel_engine_cs **siblings, 3686 unsigned int nsibling, 3687 unsigned int nctx, 3688 unsigned int flags) 3689 #define CHAIN BIT(0) 3690 { 3691 IGT_TIMEOUT(end_time); 3692 struct i915_request *request[16] = {}; 3693 struct intel_context *ve[16]; 3694 unsigned long n, prime, nc; 3695 struct igt_live_test t; 3696 ktime_t times[2] = {}; 3697 int err; 3698 3699 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 3700 3701 for (n = 0; n < nctx; n++) { 3702 ve[n] = intel_execlists_create_virtual(siblings, nsibling); 3703 if (IS_ERR(ve[n])) { 3704 err = PTR_ERR(ve[n]); 3705 nctx = n; 3706 goto out; 3707 } 3708 3709 err = intel_context_pin(ve[n]); 3710 if (err) { 3711 intel_context_put(ve[n]); 3712 nctx = n; 3713 goto out; 3714 } 3715 } 3716 3717 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); 3718 if (err) 3719 goto out; 3720 3721 for_each_prime_number_from(prime, 1, 8192) { 3722 times[1] = ktime_get_raw(); 3723 3724 if (flags & CHAIN) { 3725 for (nc = 0; nc < nctx; nc++) { 3726 for (n = 0; n < prime; n++) { 3727 struct i915_request *rq; 3728 3729 rq = i915_request_create(ve[nc]); 3730 if (IS_ERR(rq)) { 3731 err = PTR_ERR(rq); 3732 goto out; 3733 } 3734 3735 if (request[nc]) 3736 i915_request_put(request[nc]); 3737 request[nc] = i915_request_get(rq); 3738 i915_request_add(rq); 3739 } 3740 } 3741 } else { 3742 for (n = 0; n < prime; n++) { 3743 for (nc = 0; nc < nctx; nc++) { 3744 struct i915_request *rq; 3745 3746 rq = i915_request_create(ve[nc]); 3747 if (IS_ERR(rq)) { 3748 err = PTR_ERR(rq); 3749 goto out; 3750 } 3751 3752 if (request[nc]) 3753 i915_request_put(request[nc]); 3754 request[nc] = i915_request_get(rq); 3755 i915_request_add(rq); 3756 } 3757 } 3758 } 3759 3760 for (nc = 0; nc < nctx; nc++) { 3761 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { 3762 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3763 __func__, ve[0]->engine->name, 3764 request[nc]->fence.context, 3765 request[nc]->fence.seqno); 3766 3767 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3768 __func__, ve[0]->engine->name, 3769 request[nc]->fence.context, 3770 request[nc]->fence.seqno); 3771 GEM_TRACE_DUMP(); 3772 intel_gt_set_wedged(gt); 3773 break; 3774 } 3775 } 3776 3777 times[1] = ktime_sub(ktime_get_raw(), times[1]); 3778 if (prime == 1) 3779 times[0] = times[1]; 3780 3781 for (nc = 0; nc < nctx; nc++) { 3782 i915_request_put(request[nc]); 3783 request[nc] = NULL; 3784 } 3785 3786 if (__igt_timeout(end_time, NULL)) 3787 break; 3788 } 3789 3790 err = igt_live_test_end(&t); 3791 if (err) 3792 goto out; 3793 3794 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", 3795 nctx, ve[0]->engine->name, ktime_to_ns(times[0]), 3796 prime, div64_u64(ktime_to_ns(times[1]), prime)); 3797 3798 out: 3799 if (igt_flush_test(gt->i915)) 3800 err = -EIO; 3801 3802 for (nc = 0; nc < nctx; nc++) { 3803 i915_request_put(request[nc]); 3804 intel_context_unpin(ve[nc]); 3805 intel_context_put(ve[nc]); 3806 } 3807 return err; 3808 } 3809 3810 static unsigned int 3811 __select_siblings(struct intel_gt *gt, 3812 unsigned int class, 3813 struct intel_engine_cs **siblings, 3814 bool (*filter)(const struct intel_engine_cs *)) 3815 { 3816 unsigned int n = 0; 3817 unsigned int inst; 3818 3819 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3820 if (!gt->engine_class[class][inst]) 3821 continue; 3822 3823 if (filter && !filter(gt->engine_class[class][inst])) 3824 continue; 3825 3826 siblings[n++] = gt->engine_class[class][inst]; 3827 } 3828 3829 return n; 3830 } 3831 3832 static unsigned int 3833 select_siblings(struct intel_gt *gt, 3834 unsigned int class, 3835 struct intel_engine_cs **siblings) 3836 { 3837 return __select_siblings(gt, class, siblings, NULL); 3838 } 3839 3840 static int live_virtual_engine(void *arg) 3841 { 3842 struct intel_gt *gt = arg; 3843 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3844 struct intel_engine_cs *engine; 3845 enum intel_engine_id id; 3846 unsigned int class; 3847 int err; 3848 3849 if (intel_uc_uses_guc_submission(>->uc)) 3850 return 0; 3851 3852 for_each_engine(engine, gt, id) { 3853 err = nop_virtual_engine(gt, &engine, 1, 1, 0); 3854 if (err) { 3855 pr_err("Failed to wrap engine %s: err=%d\n", 3856 engine->name, err); 3857 return err; 3858 } 3859 } 3860 3861 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3862 int nsibling, n; 3863 3864 nsibling = select_siblings(gt, class, siblings); 3865 if (nsibling < 2) 3866 continue; 3867 3868 for (n = 1; n <= nsibling + 1; n++) { 3869 err = nop_virtual_engine(gt, siblings, nsibling, 3870 n, 0); 3871 if (err) 3872 return err; 3873 } 3874 3875 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); 3876 if (err) 3877 return err; 3878 } 3879 3880 return 0; 3881 } 3882 3883 static int mask_virtual_engine(struct intel_gt *gt, 3884 struct intel_engine_cs **siblings, 3885 unsigned int nsibling) 3886 { 3887 struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; 3888 struct intel_context *ve; 3889 struct igt_live_test t; 3890 unsigned int n; 3891 int err; 3892 3893 /* 3894 * Check that by setting the execution mask on a request, we can 3895 * restrict it to our desired engine within the virtual engine. 3896 */ 3897 3898 ve = intel_execlists_create_virtual(siblings, nsibling); 3899 if (IS_ERR(ve)) { 3900 err = PTR_ERR(ve); 3901 goto out_close; 3902 } 3903 3904 err = intel_context_pin(ve); 3905 if (err) 3906 goto out_put; 3907 3908 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3909 if (err) 3910 goto out_unpin; 3911 3912 for (n = 0; n < nsibling; n++) { 3913 request[n] = i915_request_create(ve); 3914 if (IS_ERR(request[n])) { 3915 err = PTR_ERR(request[n]); 3916 nsibling = n; 3917 goto out; 3918 } 3919 3920 /* Reverse order as it's more likely to be unnatural */ 3921 request[n]->execution_mask = siblings[nsibling - n - 1]->mask; 3922 3923 i915_request_get(request[n]); 3924 i915_request_add(request[n]); 3925 } 3926 3927 for (n = 0; n < nsibling; n++) { 3928 if (i915_request_wait(request[n], 0, HZ / 10) < 0) { 3929 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3930 __func__, ve->engine->name, 3931 request[n]->fence.context, 3932 request[n]->fence.seqno); 3933 3934 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3935 __func__, ve->engine->name, 3936 request[n]->fence.context, 3937 request[n]->fence.seqno); 3938 GEM_TRACE_DUMP(); 3939 intel_gt_set_wedged(gt); 3940 err = -EIO; 3941 goto out; 3942 } 3943 3944 if (request[n]->engine != siblings[nsibling - n - 1]) { 3945 pr_err("Executed on wrong sibling '%s', expected '%s'\n", 3946 request[n]->engine->name, 3947 siblings[nsibling - n - 1]->name); 3948 err = -EINVAL; 3949 goto out; 3950 } 3951 } 3952 3953 err = igt_live_test_end(&t); 3954 out: 3955 if (igt_flush_test(gt->i915)) 3956 err = -EIO; 3957 3958 for (n = 0; n < nsibling; n++) 3959 i915_request_put(request[n]); 3960 3961 out_unpin: 3962 intel_context_unpin(ve); 3963 out_put: 3964 intel_context_put(ve); 3965 out_close: 3966 return err; 3967 } 3968 3969 static int live_virtual_mask(void *arg) 3970 { 3971 struct intel_gt *gt = arg; 3972 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3973 unsigned int class; 3974 int err; 3975 3976 if (intel_uc_uses_guc_submission(>->uc)) 3977 return 0; 3978 3979 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3980 unsigned int nsibling; 3981 3982 nsibling = select_siblings(gt, class, siblings); 3983 if (nsibling < 2) 3984 continue; 3985 3986 err = mask_virtual_engine(gt, siblings, nsibling); 3987 if (err) 3988 return err; 3989 } 3990 3991 return 0; 3992 } 3993 3994 static int slicein_virtual_engine(struct intel_gt *gt, 3995 struct intel_engine_cs **siblings, 3996 unsigned int nsibling) 3997 { 3998 const long timeout = slice_timeout(siblings[0]); 3999 struct intel_context *ce; 4000 struct i915_request *rq; 4001 struct igt_spinner spin; 4002 unsigned int n; 4003 int err = 0; 4004 4005 /* 4006 * Virtual requests must take part in timeslicing on the target engines. 4007 */ 4008 4009 if (igt_spinner_init(&spin, gt)) 4010 return -ENOMEM; 4011 4012 for (n = 0; n < nsibling; n++) { 4013 ce = intel_context_create(siblings[n]); 4014 if (IS_ERR(ce)) { 4015 err = PTR_ERR(ce); 4016 goto out; 4017 } 4018 4019 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 4020 intel_context_put(ce); 4021 if (IS_ERR(rq)) { 4022 err = PTR_ERR(rq); 4023 goto out; 4024 } 4025 4026 i915_request_add(rq); 4027 } 4028 4029 ce = intel_execlists_create_virtual(siblings, nsibling); 4030 if (IS_ERR(ce)) { 4031 err = PTR_ERR(ce); 4032 goto out; 4033 } 4034 4035 rq = intel_context_create_request(ce); 4036 intel_context_put(ce); 4037 if (IS_ERR(rq)) { 4038 err = PTR_ERR(rq); 4039 goto out; 4040 } 4041 4042 i915_request_get(rq); 4043 i915_request_add(rq); 4044 if (i915_request_wait(rq, 0, timeout) < 0) { 4045 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n", 4046 __func__, rq->engine->name); 4047 GEM_TRACE_DUMP(); 4048 intel_gt_set_wedged(gt); 4049 err = -EIO; 4050 } 4051 i915_request_put(rq); 4052 4053 out: 4054 igt_spinner_end(&spin); 4055 if (igt_flush_test(gt->i915)) 4056 err = -EIO; 4057 igt_spinner_fini(&spin); 4058 return err; 4059 } 4060 4061 static int sliceout_virtual_engine(struct intel_gt *gt, 4062 struct intel_engine_cs **siblings, 4063 unsigned int nsibling) 4064 { 4065 const long timeout = slice_timeout(siblings[0]); 4066 struct intel_context *ce; 4067 struct i915_request *rq; 4068 struct igt_spinner spin; 4069 unsigned int n; 4070 int err = 0; 4071 4072 /* 4073 * Virtual requests must allow others a fair timeslice. 4074 */ 4075 4076 if (igt_spinner_init(&spin, gt)) 4077 return -ENOMEM; 4078 4079 /* XXX We do not handle oversubscription and fairness with normal rq */ 4080 for (n = 0; n < nsibling; n++) { 4081 ce = intel_execlists_create_virtual(siblings, nsibling); 4082 if (IS_ERR(ce)) { 4083 err = PTR_ERR(ce); 4084 goto out; 4085 } 4086 4087 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 4088 intel_context_put(ce); 4089 if (IS_ERR(rq)) { 4090 err = PTR_ERR(rq); 4091 goto out; 4092 } 4093 4094 i915_request_add(rq); 4095 } 4096 4097 for (n = 0; !err && n < nsibling; n++) { 4098 ce = intel_context_create(siblings[n]); 4099 if (IS_ERR(ce)) { 4100 err = PTR_ERR(ce); 4101 goto out; 4102 } 4103 4104 rq = intel_context_create_request(ce); 4105 intel_context_put(ce); 4106 if (IS_ERR(rq)) { 4107 err = PTR_ERR(rq); 4108 goto out; 4109 } 4110 4111 i915_request_get(rq); 4112 i915_request_add(rq); 4113 if (i915_request_wait(rq, 0, timeout) < 0) { 4114 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n", 4115 __func__, siblings[n]->name); 4116 GEM_TRACE_DUMP(); 4117 intel_gt_set_wedged(gt); 4118 err = -EIO; 4119 } 4120 i915_request_put(rq); 4121 } 4122 4123 out: 4124 igt_spinner_end(&spin); 4125 if (igt_flush_test(gt->i915)) 4126 err = -EIO; 4127 igt_spinner_fini(&spin); 4128 return err; 4129 } 4130 4131 static int live_virtual_slice(void *arg) 4132 { 4133 struct intel_gt *gt = arg; 4134 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4135 unsigned int class; 4136 int err; 4137 4138 if (intel_uc_uses_guc_submission(>->uc)) 4139 return 0; 4140 4141 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4142 unsigned int nsibling; 4143 4144 nsibling = __select_siblings(gt, class, siblings, 4145 intel_engine_has_timeslices); 4146 if (nsibling < 2) 4147 continue; 4148 4149 err = slicein_virtual_engine(gt, siblings, nsibling); 4150 if (err) 4151 return err; 4152 4153 err = sliceout_virtual_engine(gt, siblings, nsibling); 4154 if (err) 4155 return err; 4156 } 4157 4158 return 0; 4159 } 4160 4161 static int preserved_virtual_engine(struct intel_gt *gt, 4162 struct intel_engine_cs **siblings, 4163 unsigned int nsibling) 4164 { 4165 struct i915_request *last = NULL; 4166 struct intel_context *ve; 4167 struct i915_vma *scratch; 4168 struct igt_live_test t; 4169 unsigned int n; 4170 int err = 0; 4171 u32 *cs; 4172 4173 scratch = create_scratch(siblings[0]->gt); 4174 if (IS_ERR(scratch)) 4175 return PTR_ERR(scratch); 4176 4177 err = i915_vma_sync(scratch); 4178 if (err) 4179 goto out_scratch; 4180 4181 ve = intel_execlists_create_virtual(siblings, nsibling); 4182 if (IS_ERR(ve)) { 4183 err = PTR_ERR(ve); 4184 goto out_scratch; 4185 } 4186 4187 err = intel_context_pin(ve); 4188 if (err) 4189 goto out_put; 4190 4191 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 4192 if (err) 4193 goto out_unpin; 4194 4195 for (n = 0; n < NUM_GPR_DW; n++) { 4196 struct intel_engine_cs *engine = siblings[n % nsibling]; 4197 struct i915_request *rq; 4198 4199 rq = i915_request_create(ve); 4200 if (IS_ERR(rq)) { 4201 err = PTR_ERR(rq); 4202 goto out_end; 4203 } 4204 4205 i915_request_put(last); 4206 last = i915_request_get(rq); 4207 4208 cs = intel_ring_begin(rq, 8); 4209 if (IS_ERR(cs)) { 4210 i915_request_add(rq); 4211 err = PTR_ERR(cs); 4212 goto out_end; 4213 } 4214 4215 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4216 *cs++ = CS_GPR(engine, n); 4217 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 4218 *cs++ = 0; 4219 4220 *cs++ = MI_LOAD_REGISTER_IMM(1); 4221 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); 4222 *cs++ = n + 1; 4223 4224 *cs++ = MI_NOOP; 4225 intel_ring_advance(rq, cs); 4226 4227 /* Restrict this request to run on a particular engine */ 4228 rq->execution_mask = engine->mask; 4229 i915_request_add(rq); 4230 } 4231 4232 if (i915_request_wait(last, 0, HZ / 5) < 0) { 4233 err = -ETIME; 4234 goto out_end; 4235 } 4236 4237 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4238 if (IS_ERR(cs)) { 4239 err = PTR_ERR(cs); 4240 goto out_end; 4241 } 4242 4243 for (n = 0; n < NUM_GPR_DW; n++) { 4244 if (cs[n] != n) { 4245 pr_err("Incorrect value[%d] found for GPR[%d]\n", 4246 cs[n], n); 4247 err = -EINVAL; 4248 break; 4249 } 4250 } 4251 4252 i915_gem_object_unpin_map(scratch->obj); 4253 4254 out_end: 4255 if (igt_live_test_end(&t)) 4256 err = -EIO; 4257 i915_request_put(last); 4258 out_unpin: 4259 intel_context_unpin(ve); 4260 out_put: 4261 intel_context_put(ve); 4262 out_scratch: 4263 i915_vma_unpin_and_release(&scratch, 0); 4264 return err; 4265 } 4266 4267 static int live_virtual_preserved(void *arg) 4268 { 4269 struct intel_gt *gt = arg; 4270 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4271 unsigned int class; 4272 4273 /* 4274 * Check that the context image retains non-privileged (user) registers 4275 * from one engine to the next. For this we check that the CS_GPR 4276 * are preserved. 4277 */ 4278 4279 if (intel_uc_uses_guc_submission(>->uc)) 4280 return 0; 4281 4282 /* As we use CS_GPR we cannot run before they existed on all engines. */ 4283 if (INTEL_GEN(gt->i915) < 9) 4284 return 0; 4285 4286 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4287 int nsibling, err; 4288 4289 nsibling = select_siblings(gt, class, siblings); 4290 if (nsibling < 2) 4291 continue; 4292 4293 err = preserved_virtual_engine(gt, siblings, nsibling); 4294 if (err) 4295 return err; 4296 } 4297 4298 return 0; 4299 } 4300 4301 static int bond_virtual_engine(struct intel_gt *gt, 4302 unsigned int class, 4303 struct intel_engine_cs **siblings, 4304 unsigned int nsibling, 4305 unsigned int flags) 4306 #define BOND_SCHEDULE BIT(0) 4307 { 4308 struct intel_engine_cs *master; 4309 struct i915_request *rq[16]; 4310 enum intel_engine_id id; 4311 struct igt_spinner spin; 4312 unsigned long n; 4313 int err; 4314 4315 /* 4316 * A set of bonded requests is intended to be run concurrently 4317 * across a number of engines. We use one request per-engine 4318 * and a magic fence to schedule each of the bonded requests 4319 * at the same time. A consequence of our current scheduler is that 4320 * we only move requests to the HW ready queue when the request 4321 * becomes ready, that is when all of its prerequisite fences have 4322 * been signaled. As one of those fences is the master submit fence, 4323 * there is a delay on all secondary fences as the HW may be 4324 * currently busy. Equally, as all the requests are independent, 4325 * they may have other fences that delay individual request 4326 * submission to HW. Ergo, we do not guarantee that all requests are 4327 * immediately submitted to HW at the same time, just that if the 4328 * rules are abided by, they are ready at the same time as the 4329 * first is submitted. Userspace can embed semaphores in its batch 4330 * to ensure parallel execution of its phases as it requires. 4331 * Though naturally it gets requested that perhaps the scheduler should 4332 * take care of parallel execution, even across preemption events on 4333 * different HW. (The proper answer is of course "lalalala".) 4334 * 4335 * With the submit-fence, we have identified three possible phases 4336 * of synchronisation depending on the master fence: queued (not 4337 * ready), executing, and signaled. The first two are quite simple 4338 * and checked below. However, the signaled master fence handling is 4339 * contentious. Currently we do not distinguish between a signaled 4340 * fence and an expired fence, as once signaled it does not convey 4341 * any information about the previous execution. It may even be freed 4342 * and hence checking later it may not exist at all. Ergo we currently 4343 * do not apply the bonding constraint for an already signaled fence, 4344 * as our expectation is that it should not constrain the secondaries 4345 * and is outside of the scope of the bonded request API (i.e. all 4346 * userspace requests are meant to be running in parallel). As 4347 * it imposes no constraint, and is effectively a no-op, we do not 4348 * check below as normal execution flows are checked extensively above. 4349 * 4350 * XXX Is the degenerate handling of signaled submit fences the 4351 * expected behaviour for userpace? 4352 */ 4353 4354 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); 4355 4356 if (igt_spinner_init(&spin, gt)) 4357 return -ENOMEM; 4358 4359 err = 0; 4360 rq[0] = ERR_PTR(-ENOMEM); 4361 for_each_engine(master, gt, id) { 4362 struct i915_sw_fence fence = {}; 4363 struct intel_context *ce; 4364 4365 if (master->class == class) 4366 continue; 4367 4368 ce = intel_context_create(master); 4369 if (IS_ERR(ce)) { 4370 err = PTR_ERR(ce); 4371 goto out; 4372 } 4373 4374 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); 4375 4376 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); 4377 intel_context_put(ce); 4378 if (IS_ERR(rq[0])) { 4379 err = PTR_ERR(rq[0]); 4380 goto out; 4381 } 4382 i915_request_get(rq[0]); 4383 4384 if (flags & BOND_SCHEDULE) { 4385 onstack_fence_init(&fence); 4386 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, 4387 &fence, 4388 GFP_KERNEL); 4389 } 4390 4391 i915_request_add(rq[0]); 4392 if (err < 0) 4393 goto out; 4394 4395 if (!(flags & BOND_SCHEDULE) && 4396 !igt_wait_for_spinner(&spin, rq[0])) { 4397 err = -EIO; 4398 goto out; 4399 } 4400 4401 for (n = 0; n < nsibling; n++) { 4402 struct intel_context *ve; 4403 4404 ve = intel_execlists_create_virtual(siblings, nsibling); 4405 if (IS_ERR(ve)) { 4406 err = PTR_ERR(ve); 4407 onstack_fence_fini(&fence); 4408 goto out; 4409 } 4410 4411 err = intel_virtual_engine_attach_bond(ve->engine, 4412 master, 4413 siblings[n]); 4414 if (err) { 4415 intel_context_put(ve); 4416 onstack_fence_fini(&fence); 4417 goto out; 4418 } 4419 4420 err = intel_context_pin(ve); 4421 intel_context_put(ve); 4422 if (err) { 4423 onstack_fence_fini(&fence); 4424 goto out; 4425 } 4426 4427 rq[n + 1] = i915_request_create(ve); 4428 intel_context_unpin(ve); 4429 if (IS_ERR(rq[n + 1])) { 4430 err = PTR_ERR(rq[n + 1]); 4431 onstack_fence_fini(&fence); 4432 goto out; 4433 } 4434 i915_request_get(rq[n + 1]); 4435 4436 err = i915_request_await_execution(rq[n + 1], 4437 &rq[0]->fence, 4438 ve->engine->bond_execute); 4439 i915_request_add(rq[n + 1]); 4440 if (err < 0) { 4441 onstack_fence_fini(&fence); 4442 goto out; 4443 } 4444 } 4445 onstack_fence_fini(&fence); 4446 intel_engine_flush_submission(master); 4447 igt_spinner_end(&spin); 4448 4449 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { 4450 pr_err("Master request did not execute (on %s)!\n", 4451 rq[0]->engine->name); 4452 err = -EIO; 4453 goto out; 4454 } 4455 4456 for (n = 0; n < nsibling; n++) { 4457 if (i915_request_wait(rq[n + 1], 0, 4458 MAX_SCHEDULE_TIMEOUT) < 0) { 4459 err = -EIO; 4460 goto out; 4461 } 4462 4463 if (rq[n + 1]->engine != siblings[n]) { 4464 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", 4465 siblings[n]->name, 4466 rq[n + 1]->engine->name, 4467 rq[0]->engine->name); 4468 err = -EINVAL; 4469 goto out; 4470 } 4471 } 4472 4473 for (n = 0; !IS_ERR(rq[n]); n++) 4474 i915_request_put(rq[n]); 4475 rq[0] = ERR_PTR(-ENOMEM); 4476 } 4477 4478 out: 4479 for (n = 0; !IS_ERR(rq[n]); n++) 4480 i915_request_put(rq[n]); 4481 if (igt_flush_test(gt->i915)) 4482 err = -EIO; 4483 4484 igt_spinner_fini(&spin); 4485 return err; 4486 } 4487 4488 static int live_virtual_bond(void *arg) 4489 { 4490 static const struct phase { 4491 const char *name; 4492 unsigned int flags; 4493 } phases[] = { 4494 { "", 0 }, 4495 { "schedule", BOND_SCHEDULE }, 4496 { }, 4497 }; 4498 struct intel_gt *gt = arg; 4499 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4500 unsigned int class; 4501 int err; 4502 4503 if (intel_uc_uses_guc_submission(>->uc)) 4504 return 0; 4505 4506 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4507 const struct phase *p; 4508 int nsibling; 4509 4510 nsibling = select_siblings(gt, class, siblings); 4511 if (nsibling < 2) 4512 continue; 4513 4514 for (p = phases; p->name; p++) { 4515 err = bond_virtual_engine(gt, 4516 class, siblings, nsibling, 4517 p->flags); 4518 if (err) { 4519 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", 4520 __func__, p->name, class, nsibling, err); 4521 return err; 4522 } 4523 } 4524 } 4525 4526 return 0; 4527 } 4528 4529 static int reset_virtual_engine(struct intel_gt *gt, 4530 struct intel_engine_cs **siblings, 4531 unsigned int nsibling) 4532 { 4533 struct intel_engine_cs *engine; 4534 struct intel_context *ve; 4535 struct igt_spinner spin; 4536 struct i915_request *rq; 4537 unsigned int n; 4538 int err = 0; 4539 4540 /* 4541 * In order to support offline error capture for fast preempt reset, 4542 * we need to decouple the guilty request and ensure that it and its 4543 * descendents are not executed while the capture is in progress. 4544 */ 4545 4546 if (igt_spinner_init(&spin, gt)) 4547 return -ENOMEM; 4548 4549 ve = intel_execlists_create_virtual(siblings, nsibling); 4550 if (IS_ERR(ve)) { 4551 err = PTR_ERR(ve); 4552 goto out_spin; 4553 } 4554 4555 for (n = 0; n < nsibling; n++) 4556 st_engine_heartbeat_disable(siblings[n]); 4557 4558 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 4559 if (IS_ERR(rq)) { 4560 err = PTR_ERR(rq); 4561 goto out_heartbeat; 4562 } 4563 i915_request_add(rq); 4564 4565 if (!igt_wait_for_spinner(&spin, rq)) { 4566 intel_gt_set_wedged(gt); 4567 err = -ETIME; 4568 goto out_heartbeat; 4569 } 4570 4571 engine = rq->engine; 4572 GEM_BUG_ON(engine == ve->engine); 4573 4574 /* Take ownership of the reset and tasklet */ 4575 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 4576 >->reset.flags)) { 4577 intel_gt_set_wedged(gt); 4578 err = -EBUSY; 4579 goto out_heartbeat; 4580 } 4581 tasklet_disable(&engine->execlists.tasklet); 4582 4583 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 4584 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 4585 4586 /* Fake a preemption event; failed of course */ 4587 spin_lock_irq(&engine->active.lock); 4588 __unwind_incomplete_requests(engine); 4589 spin_unlock_irq(&engine->active.lock); 4590 GEM_BUG_ON(rq->engine != ve->engine); 4591 4592 /* Reset the engine while keeping our active request on hold */ 4593 execlists_hold(engine, rq); 4594 GEM_BUG_ON(!i915_request_on_hold(rq)); 4595 4596 intel_engine_reset(engine, NULL); 4597 GEM_BUG_ON(rq->fence.error != -EIO); 4598 4599 /* Release our grasp on the engine, letting CS flow again */ 4600 tasklet_enable(&engine->execlists.tasklet); 4601 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); 4602 4603 /* Check that we do not resubmit the held request */ 4604 i915_request_get(rq); 4605 if (!i915_request_wait(rq, 0, HZ / 5)) { 4606 pr_err("%s: on hold request completed!\n", 4607 engine->name); 4608 intel_gt_set_wedged(gt); 4609 err = -EIO; 4610 goto out_rq; 4611 } 4612 GEM_BUG_ON(!i915_request_on_hold(rq)); 4613 4614 /* But is resubmitted on release */ 4615 execlists_unhold(engine, rq); 4616 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4617 pr_err("%s: held request did not complete!\n", 4618 engine->name); 4619 intel_gt_set_wedged(gt); 4620 err = -ETIME; 4621 } 4622 4623 out_rq: 4624 i915_request_put(rq); 4625 out_heartbeat: 4626 for (n = 0; n < nsibling; n++) 4627 st_engine_heartbeat_enable(siblings[n]); 4628 4629 intel_context_put(ve); 4630 out_spin: 4631 igt_spinner_fini(&spin); 4632 return err; 4633 } 4634 4635 static int live_virtual_reset(void *arg) 4636 { 4637 struct intel_gt *gt = arg; 4638 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4639 unsigned int class; 4640 4641 /* 4642 * Check that we handle a reset event within a virtual engine. 4643 * Only the physical engine is reset, but we have to check the flow 4644 * of the virtual requests around the reset, and make sure it is not 4645 * forgotten. 4646 */ 4647 4648 if (intel_uc_uses_guc_submission(>->uc)) 4649 return 0; 4650 4651 if (!intel_has_reset_engine(gt)) 4652 return 0; 4653 4654 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4655 int nsibling, err; 4656 4657 nsibling = select_siblings(gt, class, siblings); 4658 if (nsibling < 2) 4659 continue; 4660 4661 err = reset_virtual_engine(gt, siblings, nsibling); 4662 if (err) 4663 return err; 4664 } 4665 4666 return 0; 4667 } 4668 4669 int intel_execlists_live_selftests(struct drm_i915_private *i915) 4670 { 4671 static const struct i915_subtest tests[] = { 4672 SUBTEST(live_sanitycheck), 4673 SUBTEST(live_unlite_switch), 4674 SUBTEST(live_unlite_preempt), 4675 SUBTEST(live_unlite_ring), 4676 SUBTEST(live_pin_rewind), 4677 SUBTEST(live_hold_reset), 4678 SUBTEST(live_error_interrupt), 4679 SUBTEST(live_timeslice_preempt), 4680 SUBTEST(live_timeslice_rewind), 4681 SUBTEST(live_timeslice_queue), 4682 SUBTEST(live_timeslice_nopreempt), 4683 SUBTEST(live_busywait_preempt), 4684 SUBTEST(live_preempt), 4685 SUBTEST(live_late_preempt), 4686 SUBTEST(live_nopreempt), 4687 SUBTEST(live_preempt_cancel), 4688 SUBTEST(live_suppress_self_preempt), 4689 SUBTEST(live_chain_preempt), 4690 SUBTEST(live_preempt_ring), 4691 SUBTEST(live_preempt_gang), 4692 SUBTEST(live_preempt_timeout), 4693 SUBTEST(live_preempt_user), 4694 SUBTEST(live_preempt_smoke), 4695 SUBTEST(live_virtual_engine), 4696 SUBTEST(live_virtual_mask), 4697 SUBTEST(live_virtual_preserved), 4698 SUBTEST(live_virtual_slice), 4699 SUBTEST(live_virtual_bond), 4700 SUBTEST(live_virtual_reset), 4701 }; 4702 4703 if (!HAS_EXECLISTS(i915)) 4704 return 0; 4705 4706 if (intel_gt_is_wedged(&i915->gt)) 4707 return 0; 4708 4709 return intel_gt_live_subtests(tests, &i915->gt); 4710 } 4711 4712 static int emit_semaphore_signal(struct intel_context *ce, void *slot) 4713 { 4714 const u32 offset = 4715 i915_ggtt_offset(ce->engine->status_page.vma) + 4716 offset_in_page(slot); 4717 struct i915_request *rq; 4718 u32 *cs; 4719 4720 rq = intel_context_create_request(ce); 4721 if (IS_ERR(rq)) 4722 return PTR_ERR(rq); 4723 4724 cs = intel_ring_begin(rq, 4); 4725 if (IS_ERR(cs)) { 4726 i915_request_add(rq); 4727 return PTR_ERR(cs); 4728 } 4729 4730 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 4731 *cs++ = offset; 4732 *cs++ = 0; 4733 *cs++ = 1; 4734 4735 intel_ring_advance(rq, cs); 4736 4737 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 4738 i915_request_add(rq); 4739 return 0; 4740 } 4741 4742 static int context_flush(struct intel_context *ce, long timeout) 4743 { 4744 struct i915_request *rq; 4745 struct dma_fence *fence; 4746 int err = 0; 4747 4748 rq = intel_engine_create_kernel_request(ce->engine); 4749 if (IS_ERR(rq)) 4750 return PTR_ERR(rq); 4751 4752 fence = i915_active_fence_get(&ce->timeline->last_request); 4753 if (fence) { 4754 i915_request_await_dma_fence(rq, fence); 4755 dma_fence_put(fence); 4756 } 4757 4758 rq = i915_request_get(rq); 4759 i915_request_add(rq); 4760 if (i915_request_wait(rq, 0, timeout) < 0) 4761 err = -ETIME; 4762 i915_request_put(rq); 4763 4764 rmb(); /* We know the request is written, make sure all state is too! */ 4765 return err; 4766 } 4767 4768 static int live_lrc_layout(void *arg) 4769 { 4770 struct intel_gt *gt = arg; 4771 struct intel_engine_cs *engine; 4772 enum intel_engine_id id; 4773 u32 *lrc; 4774 int err; 4775 4776 /* 4777 * Check the registers offsets we use to create the initial reg state 4778 * match the layout saved by HW. 4779 */ 4780 4781 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); 4782 if (!lrc) 4783 return -ENOMEM; 4784 4785 err = 0; 4786 for_each_engine(engine, gt, id) { 4787 u32 *hw; 4788 int dw; 4789 4790 if (!engine->default_state) 4791 continue; 4792 4793 hw = shmem_pin_map(engine->default_state); 4794 if (IS_ERR(hw)) { 4795 err = PTR_ERR(hw); 4796 break; 4797 } 4798 hw += LRC_STATE_OFFSET / sizeof(*hw); 4799 4800 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), 4801 engine->kernel_context, 4802 engine, 4803 engine->kernel_context->ring, 4804 true); 4805 4806 dw = 0; 4807 do { 4808 u32 lri = hw[dw]; 4809 4810 if (lri == 0) { 4811 dw++; 4812 continue; 4813 } 4814 4815 if (lrc[dw] == 0) { 4816 pr_debug("%s: skipped instruction %x at dword %d\n", 4817 engine->name, lri, dw); 4818 dw++; 4819 continue; 4820 } 4821 4822 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 4823 pr_err("%s: Expected LRI command at dword %d, found %08x\n", 4824 engine->name, dw, lri); 4825 err = -EINVAL; 4826 break; 4827 } 4828 4829 if (lrc[dw] != lri) { 4830 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", 4831 engine->name, dw, lri, lrc[dw]); 4832 err = -EINVAL; 4833 break; 4834 } 4835 4836 lri &= 0x7f; 4837 lri++; 4838 dw++; 4839 4840 while (lri) { 4841 if (hw[dw] != lrc[dw]) { 4842 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", 4843 engine->name, dw, hw[dw], lrc[dw]); 4844 err = -EINVAL; 4845 break; 4846 } 4847 4848 /* 4849 * Skip over the actual register value as we 4850 * expect that to differ. 4851 */ 4852 dw += 2; 4853 lri -= 2; 4854 } 4855 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 4856 4857 if (err) { 4858 pr_info("%s: HW register image:\n", engine->name); 4859 igt_hexdump(hw, PAGE_SIZE); 4860 4861 pr_info("%s: SW register image:\n", engine->name); 4862 igt_hexdump(lrc, PAGE_SIZE); 4863 } 4864 4865 shmem_unpin_map(engine->default_state, hw); 4866 if (err) 4867 break; 4868 } 4869 4870 kfree(lrc); 4871 return err; 4872 } 4873 4874 static int find_offset(const u32 *lri, u32 offset) 4875 { 4876 int i; 4877 4878 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) 4879 if (lri[i] == offset) 4880 return i; 4881 4882 return -1; 4883 } 4884 4885 static int live_lrc_fixed(void *arg) 4886 { 4887 struct intel_gt *gt = arg; 4888 struct intel_engine_cs *engine; 4889 enum intel_engine_id id; 4890 int err = 0; 4891 4892 /* 4893 * Check the assumed register offsets match the actual locations in 4894 * the context image. 4895 */ 4896 4897 for_each_engine(engine, gt, id) { 4898 const struct { 4899 u32 reg; 4900 u32 offset; 4901 const char *name; 4902 } tbl[] = { 4903 { 4904 i915_mmio_reg_offset(RING_START(engine->mmio_base)), 4905 CTX_RING_START - 1, 4906 "RING_START" 4907 }, 4908 { 4909 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), 4910 CTX_RING_CTL - 1, 4911 "RING_CTL" 4912 }, 4913 { 4914 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), 4915 CTX_RING_HEAD - 1, 4916 "RING_HEAD" 4917 }, 4918 { 4919 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), 4920 CTX_RING_TAIL - 1, 4921 "RING_TAIL" 4922 }, 4923 { 4924 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), 4925 lrc_ring_mi_mode(engine), 4926 "RING_MI_MODE" 4927 }, 4928 { 4929 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), 4930 CTX_BB_STATE - 1, 4931 "BB_STATE" 4932 }, 4933 { 4934 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)), 4935 lrc_ring_wa_bb_per_ctx(engine), 4936 "RING_BB_PER_CTX_PTR" 4937 }, 4938 { 4939 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)), 4940 lrc_ring_indirect_ptr(engine), 4941 "RING_INDIRECT_CTX_PTR" 4942 }, 4943 { 4944 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)), 4945 lrc_ring_indirect_offset(engine), 4946 "RING_INDIRECT_CTX_OFFSET" 4947 }, 4948 { 4949 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), 4950 CTX_TIMESTAMP - 1, 4951 "RING_CTX_TIMESTAMP" 4952 }, 4953 { 4954 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)), 4955 lrc_ring_gpr0(engine), 4956 "RING_CS_GPR0" 4957 }, 4958 { 4959 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)), 4960 lrc_ring_cmd_buf_cctl(engine), 4961 "RING_CMD_BUF_CCTL" 4962 }, 4963 { }, 4964 }, *t; 4965 u32 *hw; 4966 4967 if (!engine->default_state) 4968 continue; 4969 4970 hw = shmem_pin_map(engine->default_state); 4971 if (IS_ERR(hw)) { 4972 err = PTR_ERR(hw); 4973 break; 4974 } 4975 hw += LRC_STATE_OFFSET / sizeof(*hw); 4976 4977 for (t = tbl; t->name; t++) { 4978 int dw = find_offset(hw, t->reg); 4979 4980 if (dw != t->offset) { 4981 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", 4982 engine->name, 4983 t->name, 4984 t->reg, 4985 dw, 4986 t->offset); 4987 err = -EINVAL; 4988 } 4989 } 4990 4991 shmem_unpin_map(engine->default_state, hw); 4992 } 4993 4994 return err; 4995 } 4996 4997 static int __live_lrc_state(struct intel_engine_cs *engine, 4998 struct i915_vma *scratch) 4999 { 5000 struct intel_context *ce; 5001 struct i915_request *rq; 5002 enum { 5003 RING_START_IDX = 0, 5004 RING_TAIL_IDX, 5005 MAX_IDX 5006 }; 5007 u32 expected[MAX_IDX]; 5008 u32 *cs; 5009 int err; 5010 int n; 5011 5012 ce = intel_context_create(engine); 5013 if (IS_ERR(ce)) 5014 return PTR_ERR(ce); 5015 5016 err = intel_context_pin(ce); 5017 if (err) 5018 goto err_put; 5019 5020 rq = i915_request_create(ce); 5021 if (IS_ERR(rq)) { 5022 err = PTR_ERR(rq); 5023 goto err_unpin; 5024 } 5025 5026 cs = intel_ring_begin(rq, 4 * MAX_IDX); 5027 if (IS_ERR(cs)) { 5028 err = PTR_ERR(cs); 5029 i915_request_add(rq); 5030 goto err_unpin; 5031 } 5032 5033 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 5034 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); 5035 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); 5036 *cs++ = 0; 5037 5038 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); 5039 5040 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 5041 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); 5042 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); 5043 *cs++ = 0; 5044 5045 i915_vma_lock(scratch); 5046 err = i915_request_await_object(rq, scratch->obj, true); 5047 if (!err) 5048 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 5049 i915_vma_unlock(scratch); 5050 5051 i915_request_get(rq); 5052 i915_request_add(rq); 5053 if (err) 5054 goto err_rq; 5055 5056 intel_engine_flush_submission(engine); 5057 expected[RING_TAIL_IDX] = ce->ring->tail; 5058 5059 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 5060 err = -ETIME; 5061 goto err_rq; 5062 } 5063 5064 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 5065 if (IS_ERR(cs)) { 5066 err = PTR_ERR(cs); 5067 goto err_rq; 5068 } 5069 5070 for (n = 0; n < MAX_IDX; n++) { 5071 if (cs[n] != expected[n]) { 5072 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", 5073 engine->name, n, cs[n], expected[n]); 5074 err = -EINVAL; 5075 break; 5076 } 5077 } 5078 5079 i915_gem_object_unpin_map(scratch->obj); 5080 5081 err_rq: 5082 i915_request_put(rq); 5083 err_unpin: 5084 intel_context_unpin(ce); 5085 err_put: 5086 intel_context_put(ce); 5087 return err; 5088 } 5089 5090 static int live_lrc_state(void *arg) 5091 { 5092 struct intel_gt *gt = arg; 5093 struct intel_engine_cs *engine; 5094 struct i915_vma *scratch; 5095 enum intel_engine_id id; 5096 int err = 0; 5097 5098 /* 5099 * Check the live register state matches what we expect for this 5100 * intel_context. 5101 */ 5102 5103 scratch = create_scratch(gt); 5104 if (IS_ERR(scratch)) 5105 return PTR_ERR(scratch); 5106 5107 for_each_engine(engine, gt, id) { 5108 err = __live_lrc_state(engine, scratch); 5109 if (err) 5110 break; 5111 } 5112 5113 if (igt_flush_test(gt->i915)) 5114 err = -EIO; 5115 5116 i915_vma_unpin_and_release(&scratch, 0); 5117 return err; 5118 } 5119 5120 static int gpr_make_dirty(struct intel_context *ce) 5121 { 5122 struct i915_request *rq; 5123 u32 *cs; 5124 int n; 5125 5126 rq = intel_context_create_request(ce); 5127 if (IS_ERR(rq)) 5128 return PTR_ERR(rq); 5129 5130 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); 5131 if (IS_ERR(cs)) { 5132 i915_request_add(rq); 5133 return PTR_ERR(cs); 5134 } 5135 5136 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); 5137 for (n = 0; n < NUM_GPR_DW; n++) { 5138 *cs++ = CS_GPR(ce->engine, n); 5139 *cs++ = STACK_MAGIC; 5140 } 5141 *cs++ = MI_NOOP; 5142 5143 intel_ring_advance(rq, cs); 5144 5145 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 5146 i915_request_add(rq); 5147 5148 return 0; 5149 } 5150 5151 static struct i915_request * 5152 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) 5153 { 5154 const u32 offset = 5155 i915_ggtt_offset(ce->engine->status_page.vma) + 5156 offset_in_page(slot); 5157 struct i915_request *rq; 5158 u32 *cs; 5159 int err; 5160 int n; 5161 5162 rq = intel_context_create_request(ce); 5163 if (IS_ERR(rq)) 5164 return rq; 5165 5166 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); 5167 if (IS_ERR(cs)) { 5168 i915_request_add(rq); 5169 return ERR_CAST(cs); 5170 } 5171 5172 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5173 *cs++ = MI_NOOP; 5174 5175 *cs++ = MI_SEMAPHORE_WAIT | 5176 MI_SEMAPHORE_GLOBAL_GTT | 5177 MI_SEMAPHORE_POLL | 5178 MI_SEMAPHORE_SAD_NEQ_SDD; 5179 *cs++ = 0; 5180 *cs++ = offset; 5181 *cs++ = 0; 5182 5183 for (n = 0; n < NUM_GPR_DW; n++) { 5184 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 5185 *cs++ = CS_GPR(ce->engine, n); 5186 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 5187 *cs++ = 0; 5188 } 5189 5190 i915_vma_lock(scratch); 5191 err = i915_request_await_object(rq, scratch->obj, true); 5192 if (!err) 5193 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 5194 i915_vma_unlock(scratch); 5195 5196 i915_request_get(rq); 5197 i915_request_add(rq); 5198 if (err) { 5199 i915_request_put(rq); 5200 rq = ERR_PTR(err); 5201 } 5202 5203 return rq; 5204 } 5205 5206 static int __live_lrc_gpr(struct intel_engine_cs *engine, 5207 struct i915_vma *scratch, 5208 bool preempt) 5209 { 5210 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); 5211 struct intel_context *ce; 5212 struct i915_request *rq; 5213 u32 *cs; 5214 int err; 5215 int n; 5216 5217 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) 5218 return 0; /* GPR only on rcs0 for gen8 */ 5219 5220 err = gpr_make_dirty(engine->kernel_context); 5221 if (err) 5222 return err; 5223 5224 ce = intel_context_create(engine); 5225 if (IS_ERR(ce)) 5226 return PTR_ERR(ce); 5227 5228 rq = __gpr_read(ce, scratch, slot); 5229 if (IS_ERR(rq)) { 5230 err = PTR_ERR(rq); 5231 goto err_put; 5232 } 5233 5234 err = wait_for_submit(engine, rq, HZ / 2); 5235 if (err) 5236 goto err_rq; 5237 5238 if (preempt) { 5239 err = gpr_make_dirty(engine->kernel_context); 5240 if (err) 5241 goto err_rq; 5242 5243 err = emit_semaphore_signal(engine->kernel_context, slot); 5244 if (err) 5245 goto err_rq; 5246 } else { 5247 slot[0] = 1; 5248 wmb(); 5249 } 5250 5251 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 5252 err = -ETIME; 5253 goto err_rq; 5254 } 5255 5256 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 5257 if (IS_ERR(cs)) { 5258 err = PTR_ERR(cs); 5259 goto err_rq; 5260 } 5261 5262 for (n = 0; n < NUM_GPR_DW; n++) { 5263 if (cs[n]) { 5264 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", 5265 engine->name, 5266 n / 2, n & 1 ? "udw" : "ldw", 5267 cs[n]); 5268 err = -EINVAL; 5269 break; 5270 } 5271 } 5272 5273 i915_gem_object_unpin_map(scratch->obj); 5274 5275 err_rq: 5276 memset32(&slot[0], -1, 4); 5277 wmb(); 5278 i915_request_put(rq); 5279 err_put: 5280 intel_context_put(ce); 5281 return err; 5282 } 5283 5284 static int live_lrc_gpr(void *arg) 5285 { 5286 struct intel_gt *gt = arg; 5287 struct intel_engine_cs *engine; 5288 struct i915_vma *scratch; 5289 enum intel_engine_id id; 5290 int err = 0; 5291 5292 /* 5293 * Check that GPR registers are cleared in new contexts as we need 5294 * to avoid leaking any information from previous contexts. 5295 */ 5296 5297 scratch = create_scratch(gt); 5298 if (IS_ERR(scratch)) 5299 return PTR_ERR(scratch); 5300 5301 for_each_engine(engine, gt, id) { 5302 st_engine_heartbeat_disable(engine); 5303 5304 err = __live_lrc_gpr(engine, scratch, false); 5305 if (err) 5306 goto err; 5307 5308 err = __live_lrc_gpr(engine, scratch, true); 5309 if (err) 5310 goto err; 5311 5312 err: 5313 st_engine_heartbeat_enable(engine); 5314 if (igt_flush_test(gt->i915)) 5315 err = -EIO; 5316 if (err) 5317 break; 5318 } 5319 5320 i915_vma_unpin_and_release(&scratch, 0); 5321 return err; 5322 } 5323 5324 static struct i915_request * 5325 create_timestamp(struct intel_context *ce, void *slot, int idx) 5326 { 5327 const u32 offset = 5328 i915_ggtt_offset(ce->engine->status_page.vma) + 5329 offset_in_page(slot); 5330 struct i915_request *rq; 5331 u32 *cs; 5332 int err; 5333 5334 rq = intel_context_create_request(ce); 5335 if (IS_ERR(rq)) 5336 return rq; 5337 5338 cs = intel_ring_begin(rq, 10); 5339 if (IS_ERR(cs)) { 5340 err = PTR_ERR(cs); 5341 goto err; 5342 } 5343 5344 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5345 *cs++ = MI_NOOP; 5346 5347 *cs++ = MI_SEMAPHORE_WAIT | 5348 MI_SEMAPHORE_GLOBAL_GTT | 5349 MI_SEMAPHORE_POLL | 5350 MI_SEMAPHORE_SAD_NEQ_SDD; 5351 *cs++ = 0; 5352 *cs++ = offset; 5353 *cs++ = 0; 5354 5355 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 5356 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); 5357 *cs++ = offset + idx * sizeof(u32); 5358 *cs++ = 0; 5359 5360 intel_ring_advance(rq, cs); 5361 5362 rq->sched.attr.priority = I915_PRIORITY_MASK; 5363 err = 0; 5364 err: 5365 i915_request_get(rq); 5366 i915_request_add(rq); 5367 if (err) { 5368 i915_request_put(rq); 5369 return ERR_PTR(err); 5370 } 5371 5372 return rq; 5373 } 5374 5375 struct lrc_timestamp { 5376 struct intel_engine_cs *engine; 5377 struct intel_context *ce[2]; 5378 u32 poison; 5379 }; 5380 5381 static bool timestamp_advanced(u32 start, u32 end) 5382 { 5383 return (s32)(end - start) > 0; 5384 } 5385 5386 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) 5387 { 5388 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); 5389 struct i915_request *rq; 5390 u32 timestamp; 5391 int err = 0; 5392 5393 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; 5394 rq = create_timestamp(arg->ce[0], slot, 1); 5395 if (IS_ERR(rq)) 5396 return PTR_ERR(rq); 5397 5398 err = wait_for_submit(rq->engine, rq, HZ / 2); 5399 if (err) 5400 goto err; 5401 5402 if (preempt) { 5403 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; 5404 err = emit_semaphore_signal(arg->ce[1], slot); 5405 if (err) 5406 goto err; 5407 } else { 5408 slot[0] = 1; 5409 wmb(); 5410 } 5411 5412 /* And wait for switch to kernel (to save our context to memory) */ 5413 err = context_flush(arg->ce[0], HZ / 2); 5414 if (err) 5415 goto err; 5416 5417 if (!timestamp_advanced(arg->poison, slot[1])) { 5418 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", 5419 arg->engine->name, preempt ? "preempt" : "simple", 5420 arg->poison, slot[1]); 5421 err = -EINVAL; 5422 } 5423 5424 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); 5425 if (!timestamp_advanced(slot[1], timestamp)) { 5426 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", 5427 arg->engine->name, preempt ? "preempt" : "simple", 5428 slot[1], timestamp); 5429 err = -EINVAL; 5430 } 5431 5432 err: 5433 memset32(slot, -1, 4); 5434 i915_request_put(rq); 5435 return err; 5436 } 5437 5438 static int live_lrc_timestamp(void *arg) 5439 { 5440 struct lrc_timestamp data = {}; 5441 struct intel_gt *gt = arg; 5442 enum intel_engine_id id; 5443 const u32 poison[] = { 5444 0, 5445 S32_MAX, 5446 (u32)S32_MAX + 1, 5447 U32_MAX, 5448 }; 5449 5450 /* 5451 * We want to verify that the timestamp is saved and restore across 5452 * context switches and is monotonic. 5453 * 5454 * So we do this with a little bit of LRC poisoning to check various 5455 * boundary conditions, and see what happens if we preempt the context 5456 * with a second request (carrying more poison into the timestamp). 5457 */ 5458 5459 for_each_engine(data.engine, gt, id) { 5460 int i, err = 0; 5461 5462 st_engine_heartbeat_disable(data.engine); 5463 5464 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 5465 struct intel_context *tmp; 5466 5467 tmp = intel_context_create(data.engine); 5468 if (IS_ERR(tmp)) { 5469 err = PTR_ERR(tmp); 5470 goto err; 5471 } 5472 5473 err = intel_context_pin(tmp); 5474 if (err) { 5475 intel_context_put(tmp); 5476 goto err; 5477 } 5478 5479 data.ce[i] = tmp; 5480 } 5481 5482 for (i = 0; i < ARRAY_SIZE(poison); i++) { 5483 data.poison = poison[i]; 5484 5485 err = __lrc_timestamp(&data, false); 5486 if (err) 5487 break; 5488 5489 err = __lrc_timestamp(&data, true); 5490 if (err) 5491 break; 5492 } 5493 5494 err: 5495 st_engine_heartbeat_enable(data.engine); 5496 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 5497 if (!data.ce[i]) 5498 break; 5499 5500 intel_context_unpin(data.ce[i]); 5501 intel_context_put(data.ce[i]); 5502 } 5503 5504 if (igt_flush_test(gt->i915)) 5505 err = -EIO; 5506 if (err) 5507 return err; 5508 } 5509 5510 return 0; 5511 } 5512 5513 static struct i915_vma * 5514 create_user_vma(struct i915_address_space *vm, unsigned long size) 5515 { 5516 struct drm_i915_gem_object *obj; 5517 struct i915_vma *vma; 5518 int err; 5519 5520 obj = i915_gem_object_create_internal(vm->i915, size); 5521 if (IS_ERR(obj)) 5522 return ERR_CAST(obj); 5523 5524 vma = i915_vma_instance(obj, vm, NULL); 5525 if (IS_ERR(vma)) { 5526 i915_gem_object_put(obj); 5527 return vma; 5528 } 5529 5530 err = i915_vma_pin(vma, 0, 0, PIN_USER); 5531 if (err) { 5532 i915_gem_object_put(obj); 5533 return ERR_PTR(err); 5534 } 5535 5536 return vma; 5537 } 5538 5539 static struct i915_vma * 5540 store_context(struct intel_context *ce, struct i915_vma *scratch) 5541 { 5542 struct i915_vma *batch; 5543 u32 dw, x, *cs, *hw; 5544 u32 *defaults; 5545 5546 batch = create_user_vma(ce->vm, SZ_64K); 5547 if (IS_ERR(batch)) 5548 return batch; 5549 5550 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 5551 if (IS_ERR(cs)) { 5552 i915_vma_put(batch); 5553 return ERR_CAST(cs); 5554 } 5555 5556 defaults = shmem_pin_map(ce->engine->default_state); 5557 if (!defaults) { 5558 i915_gem_object_unpin_map(batch->obj); 5559 i915_vma_put(batch); 5560 return ERR_PTR(-ENOMEM); 5561 } 5562 5563 x = 0; 5564 dw = 0; 5565 hw = defaults; 5566 hw += LRC_STATE_OFFSET / sizeof(*hw); 5567 do { 5568 u32 len = hw[dw] & 0x7f; 5569 5570 if (hw[dw] == 0) { 5571 dw++; 5572 continue; 5573 } 5574 5575 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5576 dw += len + 2; 5577 continue; 5578 } 5579 5580 dw++; 5581 len = (len + 1) / 2; 5582 while (len--) { 5583 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 5584 *cs++ = hw[dw]; 5585 *cs++ = lower_32_bits(scratch->node.start + x); 5586 *cs++ = upper_32_bits(scratch->node.start + x); 5587 5588 dw += 2; 5589 x += 4; 5590 } 5591 } while (dw < PAGE_SIZE / sizeof(u32) && 5592 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5593 5594 *cs++ = MI_BATCH_BUFFER_END; 5595 5596 shmem_unpin_map(ce->engine->default_state, defaults); 5597 5598 i915_gem_object_flush_map(batch->obj); 5599 i915_gem_object_unpin_map(batch->obj); 5600 5601 return batch; 5602 } 5603 5604 static int move_to_active(struct i915_request *rq, 5605 struct i915_vma *vma, 5606 unsigned int flags) 5607 { 5608 int err; 5609 5610 i915_vma_lock(vma); 5611 err = i915_request_await_object(rq, vma->obj, flags); 5612 if (!err) 5613 err = i915_vma_move_to_active(vma, rq, flags); 5614 i915_vma_unlock(vma); 5615 5616 return err; 5617 } 5618 5619 static struct i915_request * 5620 record_registers(struct intel_context *ce, 5621 struct i915_vma *before, 5622 struct i915_vma *after, 5623 u32 *sema) 5624 { 5625 struct i915_vma *b_before, *b_after; 5626 struct i915_request *rq; 5627 u32 *cs; 5628 int err; 5629 5630 b_before = store_context(ce, before); 5631 if (IS_ERR(b_before)) 5632 return ERR_CAST(b_before); 5633 5634 b_after = store_context(ce, after); 5635 if (IS_ERR(b_after)) { 5636 rq = ERR_CAST(b_after); 5637 goto err_before; 5638 } 5639 5640 rq = intel_context_create_request(ce); 5641 if (IS_ERR(rq)) 5642 goto err_after; 5643 5644 err = move_to_active(rq, before, EXEC_OBJECT_WRITE); 5645 if (err) 5646 goto err_rq; 5647 5648 err = move_to_active(rq, b_before, 0); 5649 if (err) 5650 goto err_rq; 5651 5652 err = move_to_active(rq, after, EXEC_OBJECT_WRITE); 5653 if (err) 5654 goto err_rq; 5655 5656 err = move_to_active(rq, b_after, 0); 5657 if (err) 5658 goto err_rq; 5659 5660 cs = intel_ring_begin(rq, 14); 5661 if (IS_ERR(cs)) { 5662 err = PTR_ERR(cs); 5663 goto err_rq; 5664 } 5665 5666 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5667 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5668 *cs++ = lower_32_bits(b_before->node.start); 5669 *cs++ = upper_32_bits(b_before->node.start); 5670 5671 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5672 *cs++ = MI_SEMAPHORE_WAIT | 5673 MI_SEMAPHORE_GLOBAL_GTT | 5674 MI_SEMAPHORE_POLL | 5675 MI_SEMAPHORE_SAD_NEQ_SDD; 5676 *cs++ = 0; 5677 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 5678 offset_in_page(sema); 5679 *cs++ = 0; 5680 *cs++ = MI_NOOP; 5681 5682 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5683 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5684 *cs++ = lower_32_bits(b_after->node.start); 5685 *cs++ = upper_32_bits(b_after->node.start); 5686 5687 intel_ring_advance(rq, cs); 5688 5689 WRITE_ONCE(*sema, 0); 5690 i915_request_get(rq); 5691 i915_request_add(rq); 5692 err_after: 5693 i915_vma_put(b_after); 5694 err_before: 5695 i915_vma_put(b_before); 5696 return rq; 5697 5698 err_rq: 5699 i915_request_add(rq); 5700 rq = ERR_PTR(err); 5701 goto err_after; 5702 } 5703 5704 static struct i915_vma *load_context(struct intel_context *ce, u32 poison) 5705 { 5706 struct i915_vma *batch; 5707 u32 dw, *cs, *hw; 5708 u32 *defaults; 5709 5710 batch = create_user_vma(ce->vm, SZ_64K); 5711 if (IS_ERR(batch)) 5712 return batch; 5713 5714 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 5715 if (IS_ERR(cs)) { 5716 i915_vma_put(batch); 5717 return ERR_CAST(cs); 5718 } 5719 5720 defaults = shmem_pin_map(ce->engine->default_state); 5721 if (!defaults) { 5722 i915_gem_object_unpin_map(batch->obj); 5723 i915_vma_put(batch); 5724 return ERR_PTR(-ENOMEM); 5725 } 5726 5727 dw = 0; 5728 hw = defaults; 5729 hw += LRC_STATE_OFFSET / sizeof(*hw); 5730 do { 5731 u32 len = hw[dw] & 0x7f; 5732 5733 if (hw[dw] == 0) { 5734 dw++; 5735 continue; 5736 } 5737 5738 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5739 dw += len + 2; 5740 continue; 5741 } 5742 5743 dw++; 5744 len = (len + 1) / 2; 5745 *cs++ = MI_LOAD_REGISTER_IMM(len); 5746 while (len--) { 5747 *cs++ = hw[dw]; 5748 *cs++ = poison; 5749 dw += 2; 5750 } 5751 } while (dw < PAGE_SIZE / sizeof(u32) && 5752 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5753 5754 *cs++ = MI_BATCH_BUFFER_END; 5755 5756 shmem_unpin_map(ce->engine->default_state, defaults); 5757 5758 i915_gem_object_flush_map(batch->obj); 5759 i915_gem_object_unpin_map(batch->obj); 5760 5761 return batch; 5762 } 5763 5764 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) 5765 { 5766 struct i915_request *rq; 5767 struct i915_vma *batch; 5768 u32 *cs; 5769 int err; 5770 5771 batch = load_context(ce, poison); 5772 if (IS_ERR(batch)) 5773 return PTR_ERR(batch); 5774 5775 rq = intel_context_create_request(ce); 5776 if (IS_ERR(rq)) { 5777 err = PTR_ERR(rq); 5778 goto err_batch; 5779 } 5780 5781 err = move_to_active(rq, batch, 0); 5782 if (err) 5783 goto err_rq; 5784 5785 cs = intel_ring_begin(rq, 8); 5786 if (IS_ERR(cs)) { 5787 err = PTR_ERR(cs); 5788 goto err_rq; 5789 } 5790 5791 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5792 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5793 *cs++ = lower_32_bits(batch->node.start); 5794 *cs++ = upper_32_bits(batch->node.start); 5795 5796 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 5797 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 5798 offset_in_page(sema); 5799 *cs++ = 0; 5800 *cs++ = 1; 5801 5802 intel_ring_advance(rq, cs); 5803 5804 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 5805 err_rq: 5806 i915_request_add(rq); 5807 err_batch: 5808 i915_vma_put(batch); 5809 return err; 5810 } 5811 5812 static bool is_moving(u32 a, u32 b) 5813 { 5814 return a != b; 5815 } 5816 5817 static int compare_isolation(struct intel_engine_cs *engine, 5818 struct i915_vma *ref[2], 5819 struct i915_vma *result[2], 5820 struct intel_context *ce, 5821 u32 poison) 5822 { 5823 u32 x, dw, *hw, *lrc; 5824 u32 *A[2], *B[2]; 5825 u32 *defaults; 5826 int err = 0; 5827 5828 A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); 5829 if (IS_ERR(A[0])) 5830 return PTR_ERR(A[0]); 5831 5832 A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC); 5833 if (IS_ERR(A[1])) { 5834 err = PTR_ERR(A[1]); 5835 goto err_A0; 5836 } 5837 5838 B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC); 5839 if (IS_ERR(B[0])) { 5840 err = PTR_ERR(B[0]); 5841 goto err_A1; 5842 } 5843 5844 B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC); 5845 if (IS_ERR(B[1])) { 5846 err = PTR_ERR(B[1]); 5847 goto err_B0; 5848 } 5849 5850 lrc = i915_gem_object_pin_map(ce->state->obj, 5851 i915_coherent_map_type(engine->i915)); 5852 if (IS_ERR(lrc)) { 5853 err = PTR_ERR(lrc); 5854 goto err_B1; 5855 } 5856 lrc += LRC_STATE_OFFSET / sizeof(*hw); 5857 5858 defaults = shmem_pin_map(ce->engine->default_state); 5859 if (!defaults) { 5860 err = -ENOMEM; 5861 goto err_lrc; 5862 } 5863 5864 x = 0; 5865 dw = 0; 5866 hw = defaults; 5867 hw += LRC_STATE_OFFSET / sizeof(*hw); 5868 do { 5869 u32 len = hw[dw] & 0x7f; 5870 5871 if (hw[dw] == 0) { 5872 dw++; 5873 continue; 5874 } 5875 5876 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5877 dw += len + 2; 5878 continue; 5879 } 5880 5881 dw++; 5882 len = (len + 1) / 2; 5883 while (len--) { 5884 if (!is_moving(A[0][x], A[1][x]) && 5885 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { 5886 switch (hw[dw] & 4095) { 5887 case 0x30: /* RING_HEAD */ 5888 case 0x34: /* RING_TAIL */ 5889 break; 5890 5891 default: 5892 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", 5893 engine->name, dw, 5894 hw[dw], hw[dw + 1], 5895 A[0][x], B[0][x], B[1][x], 5896 poison, lrc[dw + 1]); 5897 err = -EINVAL; 5898 } 5899 } 5900 dw += 2; 5901 x++; 5902 } 5903 } while (dw < PAGE_SIZE / sizeof(u32) && 5904 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5905 5906 shmem_unpin_map(ce->engine->default_state, defaults); 5907 err_lrc: 5908 i915_gem_object_unpin_map(ce->state->obj); 5909 err_B1: 5910 i915_gem_object_unpin_map(result[1]->obj); 5911 err_B0: 5912 i915_gem_object_unpin_map(result[0]->obj); 5913 err_A1: 5914 i915_gem_object_unpin_map(ref[1]->obj); 5915 err_A0: 5916 i915_gem_object_unpin_map(ref[0]->obj); 5917 return err; 5918 } 5919 5920 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) 5921 { 5922 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); 5923 struct i915_vma *ref[2], *result[2]; 5924 struct intel_context *A, *B; 5925 struct i915_request *rq; 5926 int err; 5927 5928 A = intel_context_create(engine); 5929 if (IS_ERR(A)) 5930 return PTR_ERR(A); 5931 5932 B = intel_context_create(engine); 5933 if (IS_ERR(B)) { 5934 err = PTR_ERR(B); 5935 goto err_A; 5936 } 5937 5938 ref[0] = create_user_vma(A->vm, SZ_64K); 5939 if (IS_ERR(ref[0])) { 5940 err = PTR_ERR(ref[0]); 5941 goto err_B; 5942 } 5943 5944 ref[1] = create_user_vma(A->vm, SZ_64K); 5945 if (IS_ERR(ref[1])) { 5946 err = PTR_ERR(ref[1]); 5947 goto err_ref0; 5948 } 5949 5950 rq = record_registers(A, ref[0], ref[1], sema); 5951 if (IS_ERR(rq)) { 5952 err = PTR_ERR(rq); 5953 goto err_ref1; 5954 } 5955 5956 WRITE_ONCE(*sema, 1); 5957 wmb(); 5958 5959 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5960 i915_request_put(rq); 5961 err = -ETIME; 5962 goto err_ref1; 5963 } 5964 i915_request_put(rq); 5965 5966 result[0] = create_user_vma(A->vm, SZ_64K); 5967 if (IS_ERR(result[0])) { 5968 err = PTR_ERR(result[0]); 5969 goto err_ref1; 5970 } 5971 5972 result[1] = create_user_vma(A->vm, SZ_64K); 5973 if (IS_ERR(result[1])) { 5974 err = PTR_ERR(result[1]); 5975 goto err_result0; 5976 } 5977 5978 rq = record_registers(A, result[0], result[1], sema); 5979 if (IS_ERR(rq)) { 5980 err = PTR_ERR(rq); 5981 goto err_result1; 5982 } 5983 5984 err = poison_registers(B, poison, sema); 5985 if (err) { 5986 WRITE_ONCE(*sema, -1); 5987 i915_request_put(rq); 5988 goto err_result1; 5989 } 5990 5991 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5992 i915_request_put(rq); 5993 err = -ETIME; 5994 goto err_result1; 5995 } 5996 i915_request_put(rq); 5997 5998 err = compare_isolation(engine, ref, result, A, poison); 5999 6000 err_result1: 6001 i915_vma_put(result[1]); 6002 err_result0: 6003 i915_vma_put(result[0]); 6004 err_ref1: 6005 i915_vma_put(ref[1]); 6006 err_ref0: 6007 i915_vma_put(ref[0]); 6008 err_B: 6009 intel_context_put(B); 6010 err_A: 6011 intel_context_put(A); 6012 return err; 6013 } 6014 6015 static bool skip_isolation(const struct intel_engine_cs *engine) 6016 { 6017 if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9) 6018 return true; 6019 6020 if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11) 6021 return true; 6022 6023 return false; 6024 } 6025 6026 static int live_lrc_isolation(void *arg) 6027 { 6028 struct intel_gt *gt = arg; 6029 struct intel_engine_cs *engine; 6030 enum intel_engine_id id; 6031 const u32 poison[] = { 6032 STACK_MAGIC, 6033 0x3a3a3a3a, 6034 0x5c5c5c5c, 6035 0xffffffff, 6036 0xffff0000, 6037 }; 6038 int err = 0; 6039 6040 /* 6041 * Our goal is try and verify that per-context state cannot be 6042 * tampered with by another non-privileged client. 6043 * 6044 * We take the list of context registers from the LRI in the default 6045 * context image and attempt to modify that list from a remote context. 6046 */ 6047 6048 for_each_engine(engine, gt, id) { 6049 int i; 6050 6051 /* Just don't even ask */ 6052 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && 6053 skip_isolation(engine)) 6054 continue; 6055 6056 intel_engine_pm_get(engine); 6057 for (i = 0; i < ARRAY_SIZE(poison); i++) { 6058 int result; 6059 6060 result = __lrc_isolation(engine, poison[i]); 6061 if (result && !err) 6062 err = result; 6063 6064 result = __lrc_isolation(engine, ~poison[i]); 6065 if (result && !err) 6066 err = result; 6067 } 6068 intel_engine_pm_put(engine); 6069 if (igt_flush_test(gt->i915)) { 6070 err = -EIO; 6071 break; 6072 } 6073 } 6074 6075 return err; 6076 } 6077 6078 static int indirect_ctx_submit_req(struct intel_context *ce) 6079 { 6080 struct i915_request *rq; 6081 int err = 0; 6082 6083 rq = intel_context_create_request(ce); 6084 if (IS_ERR(rq)) 6085 return PTR_ERR(rq); 6086 6087 i915_request_get(rq); 6088 i915_request_add(rq); 6089 6090 if (i915_request_wait(rq, 0, HZ / 5) < 0) 6091 err = -ETIME; 6092 6093 i915_request_put(rq); 6094 6095 return err; 6096 } 6097 6098 #define CTX_BB_CANARY_OFFSET (3 * 1024) 6099 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) 6100 6101 static u32 * 6102 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) 6103 { 6104 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | 6105 MI_SRM_LRM_GLOBAL_GTT | 6106 MI_LRI_LRM_CS_MMIO; 6107 *cs++ = i915_mmio_reg_offset(RING_START(0)); 6108 *cs++ = i915_ggtt_offset(ce->state) + 6109 context_wa_bb_offset(ce) + 6110 CTX_BB_CANARY_OFFSET; 6111 *cs++ = 0; 6112 6113 return cs; 6114 } 6115 6116 static void 6117 indirect_ctx_bb_setup(struct intel_context *ce) 6118 { 6119 u32 *cs = context_indirect_bb(ce); 6120 6121 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; 6122 6123 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); 6124 } 6125 6126 static bool check_ring_start(struct intel_context *ce) 6127 { 6128 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - 6129 LRC_STATE_OFFSET + context_wa_bb_offset(ce); 6130 6131 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) 6132 return true; 6133 6134 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n", 6135 ctx_bb[CTX_BB_CANARY_INDEX], 6136 ce->lrc_reg_state[CTX_RING_START]); 6137 6138 return false; 6139 } 6140 6141 static int indirect_ctx_bb_check(struct intel_context *ce) 6142 { 6143 int err; 6144 6145 err = indirect_ctx_submit_req(ce); 6146 if (err) 6147 return err; 6148 6149 if (!check_ring_start(ce)) 6150 return -EINVAL; 6151 6152 return 0; 6153 } 6154 6155 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) 6156 { 6157 struct intel_context *a, *b; 6158 int err; 6159 6160 a = intel_context_create(engine); 6161 if (IS_ERR(a)) 6162 return PTR_ERR(a); 6163 err = intel_context_pin(a); 6164 if (err) 6165 goto put_a; 6166 6167 b = intel_context_create(engine); 6168 if (IS_ERR(b)) { 6169 err = PTR_ERR(b); 6170 goto unpin_a; 6171 } 6172 err = intel_context_pin(b); 6173 if (err) 6174 goto put_b; 6175 6176 /* We use the already reserved extra page in context state */ 6177 if (!a->wa_bb_page) { 6178 GEM_BUG_ON(b->wa_bb_page); 6179 GEM_BUG_ON(INTEL_GEN(engine->i915) == 12); 6180 goto unpin_b; 6181 } 6182 6183 /* 6184 * In order to test that our per context bb is truly per context, 6185 * and executes at the intended spot on context restoring process, 6186 * make the batch store the ring start value to memory. 6187 * As ring start is restored apriori of starting the indirect ctx bb and 6188 * as it will be different for each context, it fits to this purpose. 6189 */ 6190 indirect_ctx_bb_setup(a); 6191 indirect_ctx_bb_setup(b); 6192 6193 err = indirect_ctx_bb_check(a); 6194 if (err) 6195 goto unpin_b; 6196 6197 err = indirect_ctx_bb_check(b); 6198 6199 unpin_b: 6200 intel_context_unpin(b); 6201 put_b: 6202 intel_context_put(b); 6203 unpin_a: 6204 intel_context_unpin(a); 6205 put_a: 6206 intel_context_put(a); 6207 6208 return err; 6209 } 6210 6211 static int live_lrc_indirect_ctx_bb(void *arg) 6212 { 6213 struct intel_gt *gt = arg; 6214 struct intel_engine_cs *engine; 6215 enum intel_engine_id id; 6216 int err = 0; 6217 6218 for_each_engine(engine, gt, id) { 6219 intel_engine_pm_get(engine); 6220 err = __live_lrc_indirect_ctx_bb(engine); 6221 intel_engine_pm_put(engine); 6222 6223 if (igt_flush_test(gt->i915)) 6224 err = -EIO; 6225 6226 if (err) 6227 break; 6228 } 6229 6230 return err; 6231 } 6232 6233 static void garbage_reset(struct intel_engine_cs *engine, 6234 struct i915_request *rq) 6235 { 6236 const unsigned int bit = I915_RESET_ENGINE + engine->id; 6237 unsigned long *lock = &engine->gt->reset.flags; 6238 6239 if (test_and_set_bit(bit, lock)) 6240 return; 6241 6242 tasklet_disable(&engine->execlists.tasklet); 6243 6244 if (!rq->fence.error) 6245 intel_engine_reset(engine, NULL); 6246 6247 tasklet_enable(&engine->execlists.tasklet); 6248 clear_and_wake_up_bit(bit, lock); 6249 } 6250 6251 static struct i915_request *garbage(struct intel_context *ce, 6252 struct rnd_state *prng) 6253 { 6254 struct i915_request *rq; 6255 int err; 6256 6257 err = intel_context_pin(ce); 6258 if (err) 6259 return ERR_PTR(err); 6260 6261 prandom_bytes_state(prng, 6262 ce->lrc_reg_state, 6263 ce->engine->context_size - 6264 LRC_STATE_OFFSET); 6265 6266 rq = intel_context_create_request(ce); 6267 if (IS_ERR(rq)) { 6268 err = PTR_ERR(rq); 6269 goto err_unpin; 6270 } 6271 6272 i915_request_get(rq); 6273 i915_request_add(rq); 6274 return rq; 6275 6276 err_unpin: 6277 intel_context_unpin(ce); 6278 return ERR_PTR(err); 6279 } 6280 6281 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) 6282 { 6283 struct intel_context *ce; 6284 struct i915_request *hang; 6285 int err = 0; 6286 6287 ce = intel_context_create(engine); 6288 if (IS_ERR(ce)) 6289 return PTR_ERR(ce); 6290 6291 hang = garbage(ce, prng); 6292 if (IS_ERR(hang)) { 6293 err = PTR_ERR(hang); 6294 goto err_ce; 6295 } 6296 6297 if (wait_for_submit(engine, hang, HZ / 2)) { 6298 i915_request_put(hang); 6299 err = -ETIME; 6300 goto err_ce; 6301 } 6302 6303 intel_context_set_banned(ce); 6304 garbage_reset(engine, hang); 6305 6306 intel_engine_flush_submission(engine); 6307 if (!hang->fence.error) { 6308 i915_request_put(hang); 6309 pr_err("%s: corrupted context was not reset\n", 6310 engine->name); 6311 err = -EINVAL; 6312 goto err_ce; 6313 } 6314 6315 if (i915_request_wait(hang, 0, HZ / 2) < 0) { 6316 pr_err("%s: corrupted context did not recover\n", 6317 engine->name); 6318 i915_request_put(hang); 6319 err = -EIO; 6320 goto err_ce; 6321 } 6322 i915_request_put(hang); 6323 6324 err_ce: 6325 intel_context_put(ce); 6326 return err; 6327 } 6328 6329 static int live_lrc_garbage(void *arg) 6330 { 6331 struct intel_gt *gt = arg; 6332 struct intel_engine_cs *engine; 6333 enum intel_engine_id id; 6334 6335 /* 6336 * Verify that we can recover if one context state is completely 6337 * corrupted. 6338 */ 6339 6340 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) 6341 return 0; 6342 6343 for_each_engine(engine, gt, id) { 6344 I915_RND_STATE(prng); 6345 int err = 0, i; 6346 6347 if (!intel_has_reset_engine(engine->gt)) 6348 continue; 6349 6350 intel_engine_pm_get(engine); 6351 for (i = 0; i < 3; i++) { 6352 err = __lrc_garbage(engine, &prng); 6353 if (err) 6354 break; 6355 } 6356 intel_engine_pm_put(engine); 6357 6358 if (igt_flush_test(gt->i915)) 6359 err = -EIO; 6360 if (err) 6361 return err; 6362 } 6363 6364 return 0; 6365 } 6366 6367 static int __live_pphwsp_runtime(struct intel_engine_cs *engine) 6368 { 6369 struct intel_context *ce; 6370 struct i915_request *rq; 6371 IGT_TIMEOUT(end_time); 6372 int err; 6373 6374 ce = intel_context_create(engine); 6375 if (IS_ERR(ce)) 6376 return PTR_ERR(ce); 6377 6378 ce->runtime.num_underflow = 0; 6379 ce->runtime.max_underflow = 0; 6380 6381 do { 6382 unsigned int loop = 1024; 6383 6384 while (loop) { 6385 rq = intel_context_create_request(ce); 6386 if (IS_ERR(rq)) { 6387 err = PTR_ERR(rq); 6388 goto err_rq; 6389 } 6390 6391 if (--loop == 0) 6392 i915_request_get(rq); 6393 6394 i915_request_add(rq); 6395 } 6396 6397 if (__igt_timeout(end_time, NULL)) 6398 break; 6399 6400 i915_request_put(rq); 6401 } while (1); 6402 6403 err = i915_request_wait(rq, 0, HZ / 5); 6404 if (err < 0) { 6405 pr_err("%s: request not completed!\n", engine->name); 6406 goto err_wait; 6407 } 6408 6409 igt_flush_test(engine->i915); 6410 6411 pr_info("%s: pphwsp runtime %lluns, average %lluns\n", 6412 engine->name, 6413 intel_context_get_total_runtime_ns(ce), 6414 intel_context_get_avg_runtime_ns(ce)); 6415 6416 err = 0; 6417 if (ce->runtime.num_underflow) { 6418 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", 6419 engine->name, 6420 ce->runtime.num_underflow, 6421 ce->runtime.max_underflow); 6422 GEM_TRACE_DUMP(); 6423 err = -EOVERFLOW; 6424 } 6425 6426 err_wait: 6427 i915_request_put(rq); 6428 err_rq: 6429 intel_context_put(ce); 6430 return err; 6431 } 6432 6433 static int live_pphwsp_runtime(void *arg) 6434 { 6435 struct intel_gt *gt = arg; 6436 struct intel_engine_cs *engine; 6437 enum intel_engine_id id; 6438 int err = 0; 6439 6440 /* 6441 * Check that cumulative context runtime as stored in the pphwsp[16] 6442 * is monotonic. 6443 */ 6444 6445 for_each_engine(engine, gt, id) { 6446 err = __live_pphwsp_runtime(engine); 6447 if (err) 6448 break; 6449 } 6450 6451 if (igt_flush_test(gt->i915)) 6452 err = -EIO; 6453 6454 return err; 6455 } 6456 6457 int intel_lrc_live_selftests(struct drm_i915_private *i915) 6458 { 6459 static const struct i915_subtest tests[] = { 6460 SUBTEST(live_lrc_layout), 6461 SUBTEST(live_lrc_fixed), 6462 SUBTEST(live_lrc_state), 6463 SUBTEST(live_lrc_gpr), 6464 SUBTEST(live_lrc_isolation), 6465 SUBTEST(live_lrc_timestamp), 6466 SUBTEST(live_lrc_garbage), 6467 SUBTEST(live_pphwsp_runtime), 6468 SUBTEST(live_lrc_indirect_ctx_bb), 6469 }; 6470 6471 if (!HAS_LOGICAL_RING_CONTEXTS(i915)) 6472 return 0; 6473 6474 return intel_gt_live_subtests(tests, &i915->gt); 6475 } 6476