1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 #include <linux/string_helpers.h> 9 10 #include "gem/i915_gem_internal.h" 11 #include "gem/i915_gem_pm.h" 12 #include "gt/intel_engine_pm.h" 13 #include "gt/intel_engine_regs.h" 14 #include "gt/intel_gt.h" 15 #include "gt/intel_gt_requests.h" 16 #include "gt/intel_reset.h" 17 #include "i915_selftest.h" 18 19 #include "gem/selftests/igt_gem_utils.h" 20 #include "selftests/i915_random.h" 21 #include "selftests/igt_flush_test.h" 22 #include "selftests/igt_live_test.h" 23 #include "selftests/igt_reset.h" 24 #include "selftests/igt_spinner.h" 25 #include "selftests/mock_drm.h" 26 #include "selftests/mock_gem_device.h" 27 28 #include "huge_gem_object.h" 29 #include "igt_gem_utils.h" 30 31 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 32 33 static int live_nop_switch(void *arg) 34 { 35 const unsigned int nctx = 1024; 36 struct drm_i915_private *i915 = arg; 37 struct intel_engine_cs *engine; 38 struct i915_gem_context **ctx; 39 struct igt_live_test t; 40 struct file *file; 41 unsigned long n; 42 int err = -ENODEV; 43 44 /* 45 * Create as many contexts as we can feasibly get away with 46 * and check we can switch between them rapidly. 47 * 48 * Serves as very simple stress test for submission and HW switching 49 * between contexts. 50 */ 51 52 if (!DRIVER_CAPS(i915)->has_logical_contexts) 53 return 0; 54 55 file = mock_file(i915); 56 if (IS_ERR(file)) 57 return PTR_ERR(file); 58 59 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 60 if (!ctx) { 61 err = -ENOMEM; 62 goto out_file; 63 } 64 65 for (n = 0; n < nctx; n++) { 66 ctx[n] = live_context(i915, file); 67 if (IS_ERR(ctx[n])) { 68 err = PTR_ERR(ctx[n]); 69 goto out_ctx; 70 } 71 } 72 73 for_each_uabi_engine(engine, i915) { 74 struct i915_request *rq = NULL; 75 unsigned long end_time, prime; 76 ktime_t times[2] = {}; 77 78 times[0] = ktime_get_raw(); 79 for (n = 0; n < nctx; n++) { 80 struct i915_request *this; 81 82 this = igt_request_alloc(ctx[n], engine); 83 if (IS_ERR(this)) { 84 err = PTR_ERR(this); 85 goto out_ctx; 86 } 87 if (rq) { 88 i915_request_await_dma_fence(this, &rq->fence); 89 i915_request_put(rq); 90 } 91 rq = i915_request_get(this); 92 i915_request_add(this); 93 } 94 if (i915_request_wait(rq, 0, 10 * HZ) < 0) { 95 pr_err("Failed to populated %d contexts\n", nctx); 96 intel_gt_set_wedged(engine->gt); 97 i915_request_put(rq); 98 err = -EIO; 99 goto out_ctx; 100 } 101 i915_request_put(rq); 102 103 times[1] = ktime_get_raw(); 104 105 pr_info("Populated %d contexts on %s in %lluns\n", 106 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 107 108 err = igt_live_test_begin(&t, i915, __func__, engine->name); 109 if (err) 110 goto out_ctx; 111 112 end_time = jiffies + i915_selftest.timeout_jiffies; 113 for_each_prime_number_from(prime, 2, 8192) { 114 times[1] = ktime_get_raw(); 115 116 rq = NULL; 117 for (n = 0; n < prime; n++) { 118 struct i915_request *this; 119 120 this = igt_request_alloc(ctx[n % nctx], engine); 121 if (IS_ERR(this)) { 122 err = PTR_ERR(this); 123 goto out_ctx; 124 } 125 126 if (rq) { /* Force submission order */ 127 i915_request_await_dma_fence(this, &rq->fence); 128 i915_request_put(rq); 129 } 130 131 /* 132 * This space is left intentionally blank. 133 * 134 * We do not actually want to perform any 135 * action with this request, we just want 136 * to measure the latency in allocation 137 * and submission of our breadcrumbs - 138 * ensuring that the bare request is sufficient 139 * for the system to work (i.e. proper HEAD 140 * tracking of the rings, interrupt handling, 141 * etc). It also gives us the lowest bounds 142 * for latency. 143 */ 144 145 rq = i915_request_get(this); 146 i915_request_add(this); 147 } 148 GEM_BUG_ON(!rq); 149 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 150 pr_err("Switching between %ld contexts timed out\n", 151 prime); 152 intel_gt_set_wedged(engine->gt); 153 i915_request_put(rq); 154 break; 155 } 156 i915_request_put(rq); 157 158 times[1] = ktime_sub(ktime_get_raw(), times[1]); 159 if (prime == 2) 160 times[0] = times[1]; 161 162 if (__igt_timeout(end_time, NULL)) 163 break; 164 } 165 166 err = igt_live_test_end(&t); 167 if (err) 168 goto out_ctx; 169 170 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 171 engine->name, 172 ktime_to_ns(times[0]), 173 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 174 } 175 176 out_ctx: 177 kfree(ctx); 178 out_file: 179 fput(file); 180 return err; 181 } 182 183 struct parallel_switch { 184 struct kthread_worker *worker; 185 struct kthread_work work; 186 struct intel_context *ce[2]; 187 int result; 188 }; 189 190 static void __live_parallel_switch1(struct kthread_work *work) 191 { 192 struct parallel_switch *arg = 193 container_of(work, typeof(*arg), work); 194 IGT_TIMEOUT(end_time); 195 unsigned long count; 196 197 count = 0; 198 arg->result = 0; 199 do { 200 struct i915_request *rq = NULL; 201 int n; 202 203 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 204 struct i915_request *prev = rq; 205 206 rq = i915_request_create(arg->ce[n]); 207 if (IS_ERR(rq)) { 208 i915_request_put(prev); 209 arg->result = PTR_ERR(rq); 210 break; 211 } 212 213 i915_request_get(rq); 214 if (prev) { 215 arg->result = 216 i915_request_await_dma_fence(rq, 217 &prev->fence); 218 i915_request_put(prev); 219 } 220 221 i915_request_add(rq); 222 } 223 224 if (IS_ERR_OR_NULL(rq)) 225 break; 226 227 if (i915_request_wait(rq, 0, HZ) < 0) 228 arg->result = -ETIME; 229 230 i915_request_put(rq); 231 232 count++; 233 } while (!arg->result && !__igt_timeout(end_time, NULL)); 234 235 pr_info("%s: %lu switches (sync) <%d>\n", 236 arg->ce[0]->engine->name, count, arg->result); 237 } 238 239 static void __live_parallel_switchN(struct kthread_work *work) 240 { 241 struct parallel_switch *arg = 242 container_of(work, typeof(*arg), work); 243 struct i915_request *rq = NULL; 244 IGT_TIMEOUT(end_time); 245 unsigned long count; 246 int n; 247 248 count = 0; 249 arg->result = 0; 250 do { 251 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 252 struct i915_request *prev = rq; 253 254 rq = i915_request_create(arg->ce[n]); 255 if (IS_ERR(rq)) { 256 i915_request_put(prev); 257 arg->result = PTR_ERR(rq); 258 break; 259 } 260 261 i915_request_get(rq); 262 if (prev) { 263 arg->result = 264 i915_request_await_dma_fence(rq, 265 &prev->fence); 266 i915_request_put(prev); 267 } 268 269 i915_request_add(rq); 270 } 271 272 count++; 273 } while (!arg->result && !__igt_timeout(end_time, NULL)); 274 275 if (!IS_ERR_OR_NULL(rq)) 276 i915_request_put(rq); 277 278 pr_info("%s: %lu switches (many) <%d>\n", 279 arg->ce[0]->engine->name, count, arg->result); 280 } 281 282 static int live_parallel_switch(void *arg) 283 { 284 struct drm_i915_private *i915 = arg; 285 static void (* const func[])(struct kthread_work *) = { 286 __live_parallel_switch1, 287 __live_parallel_switchN, 288 NULL, 289 }; 290 struct parallel_switch *data = NULL; 291 struct i915_gem_engines *engines; 292 struct i915_gem_engines_iter it; 293 void (* const *fn)(struct kthread_work *); 294 struct i915_gem_context *ctx; 295 struct intel_context *ce; 296 struct file *file; 297 int n, m, count; 298 int err = 0; 299 300 /* 301 * Check we can process switches on all engines simultaneously. 302 */ 303 304 if (!DRIVER_CAPS(i915)->has_logical_contexts) 305 return 0; 306 307 file = mock_file(i915); 308 if (IS_ERR(file)) 309 return PTR_ERR(file); 310 311 ctx = live_context(i915, file); 312 if (IS_ERR(ctx)) { 313 err = PTR_ERR(ctx); 314 goto out_file; 315 } 316 317 engines = i915_gem_context_lock_engines(ctx); 318 count = engines->num_engines; 319 320 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 321 if (!data) { 322 i915_gem_context_unlock_engines(ctx); 323 err = -ENOMEM; 324 goto out_file; 325 } 326 327 m = 0; /* Use the first context as our template for the engines */ 328 for_each_gem_engine(ce, engines, it) { 329 err = intel_context_pin(ce); 330 if (err) { 331 i915_gem_context_unlock_engines(ctx); 332 goto out; 333 } 334 data[m++].ce[0] = intel_context_get(ce); 335 } 336 i915_gem_context_unlock_engines(ctx); 337 338 /* Clone the same set of engines into the other contexts */ 339 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 340 ctx = live_context(i915, file); 341 if (IS_ERR(ctx)) { 342 err = PTR_ERR(ctx); 343 goto out; 344 } 345 346 for (m = 0; m < count; m++) { 347 if (!data[m].ce[0]) 348 continue; 349 350 ce = intel_context_create(data[m].ce[0]->engine); 351 if (IS_ERR(ce)) { 352 err = PTR_ERR(ce); 353 goto out; 354 } 355 356 err = intel_context_pin(ce); 357 if (err) { 358 intel_context_put(ce); 359 goto out; 360 } 361 362 data[m].ce[n] = ce; 363 } 364 } 365 366 for (n = 0; n < count; n++) { 367 struct kthread_worker *worker; 368 369 if (!data[n].ce[0]) 370 continue; 371 372 worker = kthread_create_worker(0, "igt/parallel:%s", 373 data[n].ce[0]->engine->name); 374 if (IS_ERR(worker)) { 375 err = PTR_ERR(worker); 376 goto out; 377 } 378 379 data[n].worker = worker; 380 } 381 382 for (fn = func; !err && *fn; fn++) { 383 struct igt_live_test t; 384 385 err = igt_live_test_begin(&t, i915, __func__, ""); 386 if (err) 387 break; 388 389 for (n = 0; n < count; n++) { 390 if (!data[n].ce[0]) 391 continue; 392 393 data[n].result = 0; 394 kthread_init_work(&data[n].work, *fn); 395 kthread_queue_work(data[n].worker, &data[n].work); 396 } 397 398 for (n = 0; n < count; n++) { 399 if (data[n].ce[0]) { 400 kthread_flush_work(&data[n].work); 401 if (data[n].result && !err) 402 err = data[n].result; 403 } 404 } 405 406 if (igt_live_test_end(&t)) { 407 err = err ?: -EIO; 408 break; 409 } 410 } 411 412 out: 413 for (n = 0; n < count; n++) { 414 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 415 if (!data[n].ce[m]) 416 continue; 417 418 intel_context_unpin(data[n].ce[m]); 419 intel_context_put(data[n].ce[m]); 420 } 421 422 if (data[n].worker) 423 kthread_destroy_worker(data[n].worker); 424 } 425 kfree(data); 426 out_file: 427 fput(file); 428 return err; 429 } 430 431 static unsigned long real_page_count(struct drm_i915_gem_object *obj) 432 { 433 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 434 } 435 436 static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 437 { 438 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 439 } 440 441 static int gpu_fill(struct intel_context *ce, 442 struct drm_i915_gem_object *obj, 443 unsigned int dw) 444 { 445 struct i915_vma *vma; 446 int err; 447 448 GEM_BUG_ON(obj->base.size > ce->vm->total); 449 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 450 451 vma = i915_vma_instance(obj, ce->vm, NULL); 452 if (IS_ERR(vma)) 453 return PTR_ERR(vma); 454 455 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 456 if (err) 457 return err; 458 459 /* 460 * Within the GTT the huge objects maps every page onto 461 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 462 * We set the nth dword within the page using the nth 463 * mapping via the GTT - this should exercise the GTT mapping 464 * whilst checking that each context provides a unique view 465 * into the object. 466 */ 467 err = igt_gpu_fill_dw(ce, vma, 468 (dw * real_page_count(obj)) << PAGE_SHIFT | 469 (dw * sizeof(u32)), 470 real_page_count(obj), 471 dw); 472 i915_vma_unpin(vma); 473 474 return err; 475 } 476 477 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 478 { 479 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 480 unsigned int need_flush; 481 unsigned long n, m; 482 int err; 483 484 i915_gem_object_lock(obj, NULL); 485 err = i915_gem_object_prepare_write(obj, &need_flush); 486 if (err) 487 goto out; 488 489 for (n = 0; n < real_page_count(obj); n++) { 490 u32 *map; 491 492 map = kmap_local_page(i915_gem_object_get_page(obj, n)); 493 for (m = 0; m < DW_PER_PAGE; m++) 494 map[m] = value; 495 if (!has_llc) 496 drm_clflush_virt_range(map, PAGE_SIZE); 497 kunmap_local(map); 498 } 499 500 i915_gem_object_finish_access(obj); 501 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 502 obj->write_domain = 0; 503 out: 504 i915_gem_object_unlock(obj); 505 return err; 506 } 507 508 static noinline int cpu_check(struct drm_i915_gem_object *obj, 509 unsigned int idx, unsigned int max) 510 { 511 unsigned int needs_flush; 512 unsigned long n; 513 int err; 514 515 i915_gem_object_lock(obj, NULL); 516 err = i915_gem_object_prepare_read(obj, &needs_flush); 517 if (err) 518 goto out_unlock; 519 520 for (n = 0; n < real_page_count(obj); n++) { 521 u32 *map, m; 522 523 map = kmap_local_page(i915_gem_object_get_page(obj, n)); 524 if (needs_flush & CLFLUSH_BEFORE) 525 drm_clflush_virt_range(map, PAGE_SIZE); 526 527 for (m = 0; m < max; m++) { 528 if (map[m] != m) { 529 pr_err("%pS: Invalid value at object %d page %ld/%ld, offset %d/%d: found %x expected %x\n", 530 __builtin_return_address(0), idx, 531 n, real_page_count(obj), m, max, 532 map[m], m); 533 err = -EINVAL; 534 goto out_unmap; 535 } 536 } 537 538 for (; m < DW_PER_PAGE; m++) { 539 if (map[m] != STACK_MAGIC) { 540 pr_err("%pS: Invalid value at object %d page %ld, offset %d: found %x expected %x (uninitialised)\n", 541 __builtin_return_address(0), idx, n, m, 542 map[m], STACK_MAGIC); 543 err = -EINVAL; 544 goto out_unmap; 545 } 546 } 547 548 out_unmap: 549 kunmap_local(map); 550 if (err) 551 break; 552 } 553 554 i915_gem_object_finish_access(obj); 555 out_unlock: 556 i915_gem_object_unlock(obj); 557 return err; 558 } 559 560 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) 561 { 562 int err; 563 564 GEM_BUG_ON(obj->base.handle_count); 565 566 /* tie the object to the drm_file for easy reaping */ 567 err = idr_alloc(&to_drm_file(file)->object_idr, 568 &obj->base, 1, 0, GFP_KERNEL); 569 if (err < 0) 570 return err; 571 572 i915_gem_object_get(obj); 573 obj->base.handle_count++; 574 return 0; 575 } 576 577 static struct drm_i915_gem_object * 578 create_test_object(struct i915_address_space *vm, 579 struct file *file, 580 struct list_head *objects) 581 { 582 struct drm_i915_gem_object *obj; 583 u64 size; 584 int err; 585 586 /* Keep in GEM's good graces */ 587 intel_gt_retire_requests(vm->gt); 588 589 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 590 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 591 592 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 593 if (IS_ERR(obj)) 594 return obj; 595 596 err = file_add_object(file, obj); 597 i915_gem_object_put(obj); 598 if (err) 599 return ERR_PTR(err); 600 601 err = cpu_fill(obj, STACK_MAGIC); 602 if (err) { 603 pr_err("Failed to fill object with cpu, err=%d\n", 604 err); 605 return ERR_PTR(err); 606 } 607 608 list_add_tail(&obj->st_link, objects); 609 return obj; 610 } 611 612 static unsigned long max_dwords(struct drm_i915_gem_object *obj) 613 { 614 unsigned long npages = fake_page_count(obj); 615 616 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 617 return npages / DW_PER_PAGE; 618 } 619 620 static void throttle_release(struct i915_request **q, int count) 621 { 622 int i; 623 624 for (i = 0; i < count; i++) { 625 if (IS_ERR_OR_NULL(q[i])) 626 continue; 627 628 i915_request_put(fetch_and_zero(&q[i])); 629 } 630 } 631 632 static int throttle(struct intel_context *ce, 633 struct i915_request **q, int count) 634 { 635 int i; 636 637 if (!IS_ERR_OR_NULL(q[0])) { 638 if (i915_request_wait(q[0], 639 I915_WAIT_INTERRUPTIBLE, 640 MAX_SCHEDULE_TIMEOUT) < 0) 641 return -EINTR; 642 643 i915_request_put(q[0]); 644 } 645 646 for (i = 0; i < count - 1; i++) 647 q[i] = q[i + 1]; 648 649 q[i] = intel_context_create_request(ce); 650 if (IS_ERR(q[i])) 651 return PTR_ERR(q[i]); 652 653 i915_request_get(q[i]); 654 i915_request_add(q[i]); 655 656 return 0; 657 } 658 659 static int igt_ctx_exec(void *arg) 660 { 661 struct drm_i915_private *i915 = arg; 662 struct intel_engine_cs *engine; 663 int err = -ENODEV; 664 665 /* 666 * Create a few different contexts (with different mm) and write 667 * through each ctx/mm using the GPU making sure those writes end 668 * up in the expected pages of our obj. 669 */ 670 671 if (!DRIVER_CAPS(i915)->has_logical_contexts) 672 return 0; 673 674 for_each_uabi_engine(engine, i915) { 675 struct drm_i915_gem_object *obj = NULL; 676 unsigned long ncontexts, ndwords, dw; 677 struct i915_request *tq[5] = {}; 678 struct igt_live_test t; 679 IGT_TIMEOUT(end_time); 680 LIST_HEAD(objects); 681 struct file *file; 682 683 if (!intel_engine_can_store_dword(engine)) 684 continue; 685 686 if (!engine->context_size) 687 continue; /* No logical context support in HW */ 688 689 file = mock_file(i915); 690 if (IS_ERR(file)) 691 return PTR_ERR(file); 692 693 err = igt_live_test_begin(&t, i915, __func__, engine->name); 694 if (err) 695 goto out_file; 696 697 ncontexts = 0; 698 ndwords = 0; 699 dw = 0; 700 while (!time_after(jiffies, end_time)) { 701 struct i915_gem_context *ctx; 702 struct intel_context *ce; 703 704 ctx = kernel_context(i915, NULL); 705 if (IS_ERR(ctx)) { 706 err = PTR_ERR(ctx); 707 goto out_file; 708 } 709 710 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 711 GEM_BUG_ON(IS_ERR(ce)); 712 713 if (!obj) { 714 obj = create_test_object(ce->vm, file, &objects); 715 if (IS_ERR(obj)) { 716 err = PTR_ERR(obj); 717 intel_context_put(ce); 718 kernel_context_close(ctx); 719 goto out_file; 720 } 721 } 722 723 err = gpu_fill(ce, obj, dw); 724 if (err) { 725 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 726 ndwords, dw, max_dwords(obj), 727 engine->name, 728 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 729 err); 730 intel_context_put(ce); 731 kernel_context_close(ctx); 732 goto out_file; 733 } 734 735 err = throttle(ce, tq, ARRAY_SIZE(tq)); 736 if (err) { 737 intel_context_put(ce); 738 kernel_context_close(ctx); 739 goto out_file; 740 } 741 742 if (++dw == max_dwords(obj)) { 743 obj = NULL; 744 dw = 0; 745 } 746 747 ndwords++; 748 ncontexts++; 749 750 intel_context_put(ce); 751 kernel_context_close(ctx); 752 } 753 754 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 755 ncontexts, engine->name, ndwords); 756 757 ncontexts = dw = 0; 758 list_for_each_entry(obj, &objects, st_link) { 759 unsigned int rem = 760 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 761 762 err = cpu_check(obj, ncontexts++, rem); 763 if (err) 764 break; 765 766 dw += rem; 767 } 768 769 out_file: 770 throttle_release(tq, ARRAY_SIZE(tq)); 771 if (igt_live_test_end(&t)) 772 err = -EIO; 773 774 fput(file); 775 if (err) 776 return err; 777 778 i915_gem_drain_freed_objects(i915); 779 } 780 781 return 0; 782 } 783 784 static int igt_shared_ctx_exec(void *arg) 785 { 786 struct drm_i915_private *i915 = arg; 787 struct i915_request *tq[5] = {}; 788 struct i915_gem_context *parent; 789 struct intel_engine_cs *engine; 790 struct igt_live_test t; 791 struct file *file; 792 int err = 0; 793 794 /* 795 * Create a few different contexts with the same mm and write 796 * through each ctx using the GPU making sure those writes end 797 * up in the expected pages of our obj. 798 */ 799 if (!DRIVER_CAPS(i915)->has_logical_contexts) 800 return 0; 801 802 file = mock_file(i915); 803 if (IS_ERR(file)) 804 return PTR_ERR(file); 805 806 parent = live_context(i915, file); 807 if (IS_ERR(parent)) { 808 err = PTR_ERR(parent); 809 goto out_file; 810 } 811 812 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 813 err = 0; 814 goto out_file; 815 } 816 817 err = igt_live_test_begin(&t, i915, __func__, ""); 818 if (err) 819 goto out_file; 820 821 for_each_uabi_engine(engine, i915) { 822 unsigned long ncontexts, ndwords, dw; 823 struct drm_i915_gem_object *obj = NULL; 824 IGT_TIMEOUT(end_time); 825 LIST_HEAD(objects); 826 827 if (!intel_engine_can_store_dword(engine)) 828 continue; 829 830 dw = 0; 831 ndwords = 0; 832 ncontexts = 0; 833 while (!time_after(jiffies, end_time)) { 834 struct i915_gem_context *ctx; 835 struct intel_context *ce; 836 837 ctx = kernel_context(i915, parent->vm); 838 if (IS_ERR(ctx)) { 839 err = PTR_ERR(ctx); 840 goto out_test; 841 } 842 843 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 844 GEM_BUG_ON(IS_ERR(ce)); 845 846 if (!obj) { 847 obj = create_test_object(parent->vm, 848 file, &objects); 849 if (IS_ERR(obj)) { 850 err = PTR_ERR(obj); 851 intel_context_put(ce); 852 kernel_context_close(ctx); 853 goto out_test; 854 } 855 } 856 857 err = gpu_fill(ce, obj, dw); 858 if (err) { 859 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 860 ndwords, dw, max_dwords(obj), 861 engine->name, 862 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 863 err); 864 intel_context_put(ce); 865 kernel_context_close(ctx); 866 goto out_test; 867 } 868 869 err = throttle(ce, tq, ARRAY_SIZE(tq)); 870 if (err) { 871 intel_context_put(ce); 872 kernel_context_close(ctx); 873 goto out_test; 874 } 875 876 if (++dw == max_dwords(obj)) { 877 obj = NULL; 878 dw = 0; 879 } 880 881 ndwords++; 882 ncontexts++; 883 884 intel_context_put(ce); 885 kernel_context_close(ctx); 886 } 887 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 888 ncontexts, engine->name, ndwords); 889 890 ncontexts = dw = 0; 891 list_for_each_entry(obj, &objects, st_link) { 892 unsigned int rem = 893 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 894 895 err = cpu_check(obj, ncontexts++, rem); 896 if (err) 897 goto out_test; 898 899 dw += rem; 900 } 901 902 i915_gem_drain_freed_objects(i915); 903 } 904 out_test: 905 throttle_release(tq, ARRAY_SIZE(tq)); 906 if (igt_live_test_end(&t)) 907 err = -EIO; 908 out_file: 909 fput(file); 910 return err; 911 } 912 913 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, 914 struct i915_vma *vma, 915 struct intel_engine_cs *engine) 916 { 917 u32 *cmd; 918 919 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8); 920 921 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); 922 if (IS_ERR(cmd)) 923 return PTR_ERR(cmd); 924 925 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 926 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base)); 927 *cmd++ = lower_32_bits(i915_vma_offset(vma)); 928 *cmd++ = upper_32_bits(i915_vma_offset(vma)); 929 *cmd = MI_BATCH_BUFFER_END; 930 931 __i915_gem_object_flush_map(rpcs, 0, 64); 932 i915_gem_object_unpin_map(rpcs); 933 934 intel_gt_chipset_flush(vma->vm->gt); 935 936 return 0; 937 } 938 939 static int 940 emit_rpcs_query(struct drm_i915_gem_object *obj, 941 struct intel_context *ce, 942 struct i915_request **rq_out) 943 { 944 struct drm_i915_private *i915 = to_i915(obj->base.dev); 945 struct i915_request *rq; 946 struct i915_gem_ww_ctx ww; 947 struct i915_vma *batch; 948 struct i915_vma *vma; 949 struct drm_i915_gem_object *rpcs; 950 int err; 951 952 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 953 954 if (GRAPHICS_VER(i915) < 8) 955 return -EINVAL; 956 957 vma = i915_vma_instance(obj, ce->vm, NULL); 958 if (IS_ERR(vma)) 959 return PTR_ERR(vma); 960 961 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); 962 if (IS_ERR(rpcs)) 963 return PTR_ERR(rpcs); 964 965 batch = i915_vma_instance(rpcs, ce->vm, NULL); 966 if (IS_ERR(batch)) { 967 err = PTR_ERR(batch); 968 goto err_put; 969 } 970 971 i915_gem_ww_ctx_init(&ww, false); 972 retry: 973 err = i915_gem_object_lock(obj, &ww); 974 if (!err) 975 err = i915_gem_object_lock(rpcs, &ww); 976 if (!err) 977 err = i915_gem_object_set_to_gtt_domain(obj, false); 978 if (!err) 979 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); 980 if (err) 981 goto err_put; 982 983 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); 984 if (err) 985 goto err_vma; 986 987 err = rpcs_query_batch(rpcs, vma, ce->engine); 988 if (err) 989 goto err_batch; 990 991 rq = i915_request_create(ce); 992 if (IS_ERR(rq)) { 993 err = PTR_ERR(rq); 994 goto err_batch; 995 } 996 997 err = i915_vma_move_to_active(batch, rq, 0); 998 if (err) 999 goto skip_request; 1000 1001 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1002 if (err) 1003 goto skip_request; 1004 1005 if (rq->engine->emit_init_breadcrumb) { 1006 err = rq->engine->emit_init_breadcrumb(rq); 1007 if (err) 1008 goto skip_request; 1009 } 1010 1011 err = rq->engine->emit_bb_start(rq, 1012 i915_vma_offset(batch), 1013 i915_vma_size(batch), 1014 0); 1015 if (err) 1016 goto skip_request; 1017 1018 *rq_out = i915_request_get(rq); 1019 1020 skip_request: 1021 if (err) 1022 i915_request_set_error_once(rq, err); 1023 i915_request_add(rq); 1024 err_batch: 1025 i915_vma_unpin(batch); 1026 err_vma: 1027 i915_vma_unpin(vma); 1028 err_put: 1029 if (err == -EDEADLK) { 1030 err = i915_gem_ww_ctx_backoff(&ww); 1031 if (!err) 1032 goto retry; 1033 } 1034 i915_gem_ww_ctx_fini(&ww); 1035 i915_gem_object_put(rpcs); 1036 return err; 1037 } 1038 1039 #define TEST_IDLE BIT(0) 1040 #define TEST_BUSY BIT(1) 1041 #define TEST_RESET BIT(2) 1042 1043 static int 1044 __sseu_prepare(const char *name, 1045 unsigned int flags, 1046 struct intel_context *ce, 1047 struct igt_spinner **spin) 1048 { 1049 struct i915_request *rq; 1050 int ret; 1051 1052 *spin = NULL; 1053 if (!(flags & (TEST_BUSY | TEST_RESET))) 1054 return 0; 1055 1056 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1057 if (!*spin) 1058 return -ENOMEM; 1059 1060 ret = igt_spinner_init(*spin, ce->engine->gt); 1061 if (ret) 1062 goto err_free; 1063 1064 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1065 if (IS_ERR(rq)) { 1066 ret = PTR_ERR(rq); 1067 goto err_fini; 1068 } 1069 1070 i915_request_add(rq); 1071 1072 if (!igt_wait_for_spinner(*spin, rq)) { 1073 pr_err("%s: Spinner failed to start!\n", name); 1074 ret = -ETIMEDOUT; 1075 goto err_end; 1076 } 1077 1078 return 0; 1079 1080 err_end: 1081 igt_spinner_end(*spin); 1082 err_fini: 1083 igt_spinner_fini(*spin); 1084 err_free: 1085 kfree(fetch_and_zero(spin)); 1086 return ret; 1087 } 1088 1089 static int 1090 __read_slice_count(struct intel_context *ce, 1091 struct drm_i915_gem_object *obj, 1092 struct igt_spinner *spin, 1093 u32 *rpcs) 1094 { 1095 struct i915_request *rq = NULL; 1096 u32 s_mask, s_shift; 1097 unsigned int cnt; 1098 u32 *buf, val; 1099 long ret; 1100 1101 ret = emit_rpcs_query(obj, ce, &rq); 1102 if (ret) 1103 return ret; 1104 1105 if (spin) 1106 igt_spinner_end(spin); 1107 1108 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1109 i915_request_put(rq); 1110 if (ret < 0) 1111 return ret; 1112 1113 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1114 if (IS_ERR(buf)) { 1115 ret = PTR_ERR(buf); 1116 return ret; 1117 } 1118 1119 if (GRAPHICS_VER(ce->engine->i915) >= 11) { 1120 s_mask = GEN11_RPCS_S_CNT_MASK; 1121 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1122 } else { 1123 s_mask = GEN8_RPCS_S_CNT_MASK; 1124 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1125 } 1126 1127 val = *buf; 1128 cnt = (val & s_mask) >> s_shift; 1129 *rpcs = val; 1130 1131 i915_gem_object_unpin_map(obj); 1132 1133 return cnt; 1134 } 1135 1136 static int 1137 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1138 const char *prefix, const char *suffix) 1139 { 1140 if (slices == expected) 1141 return 0; 1142 1143 if (slices < 0) { 1144 pr_err("%s: %s read slice count failed with %d%s\n", 1145 name, prefix, slices, suffix); 1146 return slices; 1147 } 1148 1149 pr_err("%s: %s slice count %d is not %u%s\n", 1150 name, prefix, slices, expected, suffix); 1151 1152 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1153 rpcs, slices, 1154 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1155 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1156 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1157 1158 return -EINVAL; 1159 } 1160 1161 static int 1162 __sseu_finish(const char *name, 1163 unsigned int flags, 1164 struct intel_context *ce, 1165 struct drm_i915_gem_object *obj, 1166 unsigned int expected, 1167 struct igt_spinner *spin) 1168 { 1169 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1170 u32 rpcs = 0; 1171 int ret = 0; 1172 1173 if (flags & TEST_RESET) { 1174 ret = intel_engine_reset(ce->engine, "sseu"); 1175 if (ret) 1176 goto out; 1177 } 1178 1179 ret = __read_slice_count(ce, obj, 1180 flags & TEST_RESET ? NULL : spin, &rpcs); 1181 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1182 if (ret) 1183 goto out; 1184 1185 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1186 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1187 1188 out: 1189 if (spin) 1190 igt_spinner_end(spin); 1191 1192 if ((flags & TEST_IDLE) && ret == 0) { 1193 ret = igt_flush_test(ce->engine->i915); 1194 if (ret) 1195 return ret; 1196 1197 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1198 ret = __check_rpcs(name, rpcs, ret, expected, 1199 "Context", " after idle!"); 1200 } 1201 1202 return ret; 1203 } 1204 1205 static int 1206 __sseu_test(const char *name, 1207 unsigned int flags, 1208 struct intel_context *ce, 1209 struct drm_i915_gem_object *obj, 1210 struct intel_sseu sseu) 1211 { 1212 struct igt_spinner *spin = NULL; 1213 int ret; 1214 1215 intel_engine_pm_get(ce->engine); 1216 1217 ret = __sseu_prepare(name, flags, ce, &spin); 1218 if (ret) 1219 goto out_pm; 1220 1221 ret = intel_context_reconfigure_sseu(ce, sseu); 1222 if (ret) 1223 goto out_spin; 1224 1225 ret = __sseu_finish(name, flags, ce, obj, 1226 hweight32(sseu.slice_mask), spin); 1227 1228 out_spin: 1229 if (spin) { 1230 igt_spinner_end(spin); 1231 igt_spinner_fini(spin); 1232 kfree(spin); 1233 } 1234 out_pm: 1235 intel_engine_pm_put(ce->engine); 1236 return ret; 1237 } 1238 1239 static int 1240 __igt_ctx_sseu(struct drm_i915_private *i915, 1241 const char *name, 1242 unsigned int flags) 1243 { 1244 struct drm_i915_gem_object *obj; 1245 int inst = 0; 1246 int ret = 0; 1247 1248 if (GRAPHICS_VER(i915) < 9) 1249 return 0; 1250 1251 if (flags & TEST_RESET) 1252 igt_global_reset_lock(to_gt(i915)); 1253 1254 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1255 if (IS_ERR(obj)) { 1256 ret = PTR_ERR(obj); 1257 goto out_unlock; 1258 } 1259 1260 do { 1261 struct intel_engine_cs *engine; 1262 struct intel_context *ce; 1263 struct intel_sseu pg_sseu; 1264 1265 engine = intel_engine_lookup_user(i915, 1266 I915_ENGINE_CLASS_RENDER, 1267 inst++); 1268 if (!engine) 1269 break; 1270 1271 if (hweight32(engine->sseu.slice_mask) < 2) 1272 continue; 1273 1274 if (!engine->gt->info.sseu.has_slice_pg) 1275 continue; 1276 1277 /* 1278 * Gen11 VME friendly power-gated configuration with 1279 * half enabled sub-slices. 1280 */ 1281 pg_sseu = engine->sseu; 1282 pg_sseu.slice_mask = 1; 1283 pg_sseu.subslice_mask = 1284 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1285 1286 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1287 engine->name, name, flags, 1288 hweight32(engine->sseu.slice_mask), 1289 hweight32(pg_sseu.slice_mask)); 1290 1291 ce = intel_context_create(engine); 1292 if (IS_ERR(ce)) { 1293 ret = PTR_ERR(ce); 1294 goto out_put; 1295 } 1296 1297 ret = intel_context_pin(ce); 1298 if (ret) 1299 goto out_ce; 1300 1301 /* First set the default mask. */ 1302 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1303 if (ret) 1304 goto out_unpin; 1305 1306 /* Then set a power-gated configuration. */ 1307 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1308 if (ret) 1309 goto out_unpin; 1310 1311 /* Back to defaults. */ 1312 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1313 if (ret) 1314 goto out_unpin; 1315 1316 /* One last power-gated configuration for the road. */ 1317 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1318 if (ret) 1319 goto out_unpin; 1320 1321 out_unpin: 1322 intel_context_unpin(ce); 1323 out_ce: 1324 intel_context_put(ce); 1325 } while (!ret); 1326 1327 if (igt_flush_test(i915)) 1328 ret = -EIO; 1329 1330 out_put: 1331 i915_gem_object_put(obj); 1332 1333 out_unlock: 1334 if (flags & TEST_RESET) 1335 igt_global_reset_unlock(to_gt(i915)); 1336 1337 if (ret) 1338 pr_err("%s: Failed with %d!\n", name, ret); 1339 1340 return ret; 1341 } 1342 1343 static int igt_ctx_sseu(void *arg) 1344 { 1345 struct { 1346 const char *name; 1347 unsigned int flags; 1348 } *phase, phases[] = { 1349 { .name = "basic", .flags = 0 }, 1350 { .name = "idle", .flags = TEST_IDLE }, 1351 { .name = "busy", .flags = TEST_BUSY }, 1352 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1353 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1354 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1355 }; 1356 unsigned int i; 1357 int ret = 0; 1358 1359 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1360 i++, phase++) 1361 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1362 1363 return ret; 1364 } 1365 1366 static int igt_ctx_readonly(void *arg) 1367 { 1368 struct drm_i915_private *i915 = arg; 1369 unsigned long idx, ndwords, dw, num_engines; 1370 struct drm_i915_gem_object *obj = NULL; 1371 struct i915_request *tq[5] = {}; 1372 struct i915_gem_engines_iter it; 1373 struct i915_address_space *vm; 1374 struct i915_gem_context *ctx; 1375 struct intel_context *ce; 1376 struct igt_live_test t; 1377 I915_RND_STATE(prng); 1378 IGT_TIMEOUT(end_time); 1379 LIST_HEAD(objects); 1380 struct file *file; 1381 int err = -ENODEV; 1382 1383 /* 1384 * Create a few read-only objects (with the occasional writable object) 1385 * and try to write into these object checking that the GPU discards 1386 * any write to a read-only object. 1387 */ 1388 1389 file = mock_file(i915); 1390 if (IS_ERR(file)) 1391 return PTR_ERR(file); 1392 1393 err = igt_live_test_begin(&t, i915, __func__, ""); 1394 if (err) 1395 goto out_file; 1396 1397 ctx = live_context(i915, file); 1398 if (IS_ERR(ctx)) { 1399 err = PTR_ERR(ctx); 1400 goto out_file; 1401 } 1402 1403 vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm; 1404 if (!vm || !vm->has_read_only) { 1405 err = 0; 1406 goto out_file; 1407 } 1408 1409 num_engines = 0; 1410 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) 1411 if (intel_engine_can_store_dword(ce->engine)) 1412 num_engines++; 1413 i915_gem_context_unlock_engines(ctx); 1414 1415 ndwords = 0; 1416 dw = 0; 1417 while (!time_after(jiffies, end_time)) { 1418 for_each_gem_engine(ce, 1419 i915_gem_context_lock_engines(ctx), it) { 1420 if (!intel_engine_can_store_dword(ce->engine)) 1421 continue; 1422 1423 if (!obj) { 1424 obj = create_test_object(ce->vm, file, &objects); 1425 if (IS_ERR(obj)) { 1426 err = PTR_ERR(obj); 1427 i915_gem_context_unlock_engines(ctx); 1428 goto out_file; 1429 } 1430 1431 if (prandom_u32_state(&prng) & 1) 1432 i915_gem_object_set_readonly(obj); 1433 } 1434 1435 err = gpu_fill(ce, obj, dw); 1436 if (err) { 1437 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1438 ndwords, dw, max_dwords(obj), 1439 ce->engine->name, 1440 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 1441 err); 1442 i915_gem_context_unlock_engines(ctx); 1443 goto out_file; 1444 } 1445 1446 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1447 if (err) { 1448 i915_gem_context_unlock_engines(ctx); 1449 goto out_file; 1450 } 1451 1452 if (++dw == max_dwords(obj)) { 1453 obj = NULL; 1454 dw = 0; 1455 } 1456 ndwords++; 1457 } 1458 i915_gem_context_unlock_engines(ctx); 1459 } 1460 pr_info("Submitted %lu dwords (across %lu engines)\n", 1461 ndwords, num_engines); 1462 1463 dw = 0; 1464 idx = 0; 1465 list_for_each_entry(obj, &objects, st_link) { 1466 unsigned int rem = 1467 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1468 unsigned int num_writes; 1469 1470 num_writes = rem; 1471 if (i915_gem_object_is_readonly(obj)) 1472 num_writes = 0; 1473 1474 err = cpu_check(obj, idx++, num_writes); 1475 if (err) 1476 break; 1477 1478 dw += rem; 1479 } 1480 1481 out_file: 1482 throttle_release(tq, ARRAY_SIZE(tq)); 1483 if (igt_live_test_end(&t)) 1484 err = -EIO; 1485 1486 fput(file); 1487 return err; 1488 } 1489 1490 static int check_scratch(struct i915_address_space *vm, u64 offset) 1491 { 1492 struct drm_mm_node *node; 1493 1494 mutex_lock(&vm->mutex); 1495 node = __drm_mm_interval_first(&vm->mm, 1496 offset, offset + sizeof(u32) - 1); 1497 mutex_unlock(&vm->mutex); 1498 if (!node || node->start > offset) 1499 return 0; 1500 1501 GEM_BUG_ON(offset >= node->start + node->size); 1502 1503 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1504 upper_32_bits(offset), lower_32_bits(offset)); 1505 return -EINVAL; 1506 } 1507 1508 static int write_to_scratch(struct i915_gem_context *ctx, 1509 struct intel_engine_cs *engine, 1510 struct drm_i915_gem_object *obj, 1511 u64 offset, u32 value) 1512 { 1513 struct drm_i915_private *i915 = ctx->i915; 1514 struct i915_address_space *vm; 1515 struct i915_request *rq; 1516 struct i915_vma *vma; 1517 u32 *cmd; 1518 int err; 1519 1520 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1521 1522 err = check_scratch(ctx->vm, offset); 1523 if (err) 1524 return err; 1525 1526 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1527 if (IS_ERR(cmd)) 1528 return PTR_ERR(cmd); 1529 1530 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1531 if (GRAPHICS_VER(i915) >= 8) { 1532 *cmd++ = lower_32_bits(offset); 1533 *cmd++ = upper_32_bits(offset); 1534 } else { 1535 *cmd++ = 0; 1536 *cmd++ = offset; 1537 } 1538 *cmd++ = value; 1539 *cmd = MI_BATCH_BUFFER_END; 1540 __i915_gem_object_flush_map(obj, 0, 64); 1541 i915_gem_object_unpin_map(obj); 1542 1543 intel_gt_chipset_flush(engine->gt); 1544 1545 vm = i915_gem_context_get_eb_vm(ctx); 1546 vma = i915_vma_instance(obj, vm, NULL); 1547 if (IS_ERR(vma)) { 1548 err = PTR_ERR(vma); 1549 goto out_vm; 1550 } 1551 1552 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1553 if (err) 1554 goto out_vm; 1555 1556 rq = igt_request_alloc(ctx, engine); 1557 if (IS_ERR(rq)) { 1558 err = PTR_ERR(rq); 1559 goto err_unpin; 1560 } 1561 1562 err = igt_vma_move_to_active_unlocked(vma, rq, 0); 1563 if (err) 1564 goto skip_request; 1565 1566 if (rq->engine->emit_init_breadcrumb) { 1567 err = rq->engine->emit_init_breadcrumb(rq); 1568 if (err) 1569 goto skip_request; 1570 } 1571 1572 err = engine->emit_bb_start(rq, i915_vma_offset(vma), 1573 i915_vma_size(vma), 0); 1574 if (err) 1575 goto skip_request; 1576 1577 i915_vma_unpin(vma); 1578 1579 i915_request_add(rq); 1580 1581 goto out_vm; 1582 skip_request: 1583 i915_request_set_error_once(rq, err); 1584 i915_request_add(rq); 1585 err_unpin: 1586 i915_vma_unpin(vma); 1587 out_vm: 1588 i915_vm_put(vm); 1589 1590 if (!err) 1591 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1592 1593 return err; 1594 } 1595 1596 static int read_from_scratch(struct i915_gem_context *ctx, 1597 struct intel_engine_cs *engine, 1598 struct drm_i915_gem_object *obj, 1599 u64 offset, u32 *value) 1600 { 1601 struct drm_i915_private *i915 = ctx->i915; 1602 struct i915_address_space *vm; 1603 const u32 result = 0x100; 1604 struct i915_request *rq; 1605 struct i915_vma *vma; 1606 unsigned int flags; 1607 u32 *cmd; 1608 int err; 1609 1610 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1611 1612 err = check_scratch(ctx->vm, offset); 1613 if (err) 1614 return err; 1615 1616 if (GRAPHICS_VER(i915) >= 8) { 1617 const u32 GPR0 = engine->mmio_base + 0x600; 1618 1619 vm = i915_gem_context_get_eb_vm(ctx); 1620 vma = i915_vma_instance(obj, vm, NULL); 1621 if (IS_ERR(vma)) { 1622 err = PTR_ERR(vma); 1623 goto out_vm; 1624 } 1625 1626 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1627 if (err) 1628 goto out_vm; 1629 1630 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1631 if (IS_ERR(cmd)) { 1632 err = PTR_ERR(cmd); 1633 goto err_unpin; 1634 } 1635 1636 memset(cmd, POISON_INUSE, PAGE_SIZE); 1637 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1638 *cmd++ = GPR0; 1639 *cmd++ = lower_32_bits(offset); 1640 *cmd++ = upper_32_bits(offset); 1641 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1642 *cmd++ = GPR0; 1643 *cmd++ = result; 1644 *cmd++ = 0; 1645 *cmd = MI_BATCH_BUFFER_END; 1646 1647 i915_gem_object_flush_map(obj); 1648 i915_gem_object_unpin_map(obj); 1649 1650 flags = 0; 1651 } else { 1652 const u32 reg = engine->mmio_base + 0x420; 1653 1654 /* hsw: register access even to 3DPRIM! is protected */ 1655 vm = i915_vm_get(&engine->gt->ggtt->vm); 1656 vma = i915_vma_instance(obj, vm, NULL); 1657 if (IS_ERR(vma)) { 1658 err = PTR_ERR(vma); 1659 goto out_vm; 1660 } 1661 1662 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1663 if (err) 1664 goto out_vm; 1665 1666 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1667 if (IS_ERR(cmd)) { 1668 err = PTR_ERR(cmd); 1669 goto err_unpin; 1670 } 1671 1672 memset(cmd, POISON_INUSE, PAGE_SIZE); 1673 *cmd++ = MI_LOAD_REGISTER_MEM; 1674 *cmd++ = reg; 1675 *cmd++ = offset; 1676 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 1677 *cmd++ = reg; 1678 *cmd++ = i915_vma_offset(vma) + result; 1679 *cmd = MI_BATCH_BUFFER_END; 1680 1681 i915_gem_object_flush_map(obj); 1682 i915_gem_object_unpin_map(obj); 1683 1684 flags = I915_DISPATCH_SECURE; 1685 } 1686 1687 intel_gt_chipset_flush(engine->gt); 1688 1689 rq = igt_request_alloc(ctx, engine); 1690 if (IS_ERR(rq)) { 1691 err = PTR_ERR(rq); 1692 goto err_unpin; 1693 } 1694 1695 err = igt_vma_move_to_active_unlocked(vma, rq, EXEC_OBJECT_WRITE); 1696 if (err) 1697 goto skip_request; 1698 1699 if (rq->engine->emit_init_breadcrumb) { 1700 err = rq->engine->emit_init_breadcrumb(rq); 1701 if (err) 1702 goto skip_request; 1703 } 1704 1705 err = engine->emit_bb_start(rq, i915_vma_offset(vma), 1706 i915_vma_size(vma), flags); 1707 if (err) 1708 goto skip_request; 1709 1710 i915_vma_unpin(vma); 1711 1712 i915_request_add(rq); 1713 1714 i915_gem_object_lock(obj, NULL); 1715 err = i915_gem_object_set_to_cpu_domain(obj, false); 1716 i915_gem_object_unlock(obj); 1717 if (err) 1718 goto out_vm; 1719 1720 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1721 if (IS_ERR(cmd)) { 1722 err = PTR_ERR(cmd); 1723 goto out_vm; 1724 } 1725 1726 *value = cmd[result / sizeof(*cmd)]; 1727 i915_gem_object_unpin_map(obj); 1728 1729 goto out_vm; 1730 skip_request: 1731 i915_request_set_error_once(rq, err); 1732 i915_request_add(rq); 1733 err_unpin: 1734 i915_vma_unpin(vma); 1735 out_vm: 1736 i915_vm_put(vm); 1737 1738 if (!err) 1739 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1740 1741 return err; 1742 } 1743 1744 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) 1745 { 1746 struct i915_address_space *vm; 1747 u32 *vaddr; 1748 int err = 0; 1749 1750 vm = ctx->vm; 1751 if (!vm) 1752 return -ENODEV; 1753 1754 if (!vm->scratch[0]) { 1755 pr_err("No scratch page!\n"); 1756 return -EINVAL; 1757 } 1758 1759 vaddr = __px_vaddr(vm->scratch[0]); 1760 1761 memcpy(out, vaddr, sizeof(*out)); 1762 if (memchr_inv(vaddr, *out, PAGE_SIZE)) { 1763 pr_err("Inconsistent initial state of scratch page!\n"); 1764 err = -EINVAL; 1765 } 1766 1767 return err; 1768 } 1769 1770 static int igt_vm_isolation(void *arg) 1771 { 1772 struct drm_i915_private *i915 = arg; 1773 struct i915_gem_context *ctx_a, *ctx_b; 1774 struct drm_i915_gem_object *obj_a, *obj_b; 1775 unsigned long num_engines, count; 1776 struct intel_engine_cs *engine; 1777 struct igt_live_test t; 1778 I915_RND_STATE(prng); 1779 struct file *file; 1780 u64 vm_total; 1781 u32 expected; 1782 int err; 1783 1784 if (GRAPHICS_VER(i915) < 7) 1785 return 0; 1786 1787 /* 1788 * The simple goal here is that a write into one context is not 1789 * observed in a second (separate page tables and scratch). 1790 */ 1791 1792 file = mock_file(i915); 1793 if (IS_ERR(file)) 1794 return PTR_ERR(file); 1795 1796 err = igt_live_test_begin(&t, i915, __func__, ""); 1797 if (err) 1798 goto out_file; 1799 1800 ctx_a = live_context(i915, file); 1801 if (IS_ERR(ctx_a)) { 1802 err = PTR_ERR(ctx_a); 1803 goto out_file; 1804 } 1805 1806 ctx_b = live_context(i915, file); 1807 if (IS_ERR(ctx_b)) { 1808 err = PTR_ERR(ctx_b); 1809 goto out_file; 1810 } 1811 1812 /* We can only test vm isolation, if the vm are distinct */ 1813 if (ctx_a->vm == ctx_b->vm) 1814 goto out_file; 1815 1816 /* Read the initial state of the scratch page */ 1817 err = check_scratch_page(ctx_a, &expected); 1818 if (err) 1819 goto out_file; 1820 1821 err = check_scratch_page(ctx_b, &expected); 1822 if (err) 1823 goto out_file; 1824 1825 vm_total = ctx_a->vm->total; 1826 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1827 1828 obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE); 1829 if (IS_ERR(obj_a)) { 1830 err = PTR_ERR(obj_a); 1831 goto out_file; 1832 } 1833 1834 obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE); 1835 if (IS_ERR(obj_b)) { 1836 err = PTR_ERR(obj_b); 1837 goto put_a; 1838 } 1839 1840 count = 0; 1841 num_engines = 0; 1842 for_each_uabi_engine(engine, i915) { 1843 IGT_TIMEOUT(end_time); 1844 unsigned long this = 0; 1845 1846 if (!intel_engine_can_store_dword(engine)) 1847 continue; 1848 1849 /* Not all engines have their own GPR! */ 1850 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS) 1851 continue; 1852 1853 while (!__igt_timeout(end_time, NULL)) { 1854 u32 value = 0xc5c5c5c5; 1855 u64 offset; 1856 1857 /* Leave enough space at offset 0 for the batch */ 1858 offset = igt_random_offset(&prng, 1859 I915_GTT_PAGE_SIZE, vm_total, 1860 sizeof(u32), alignof_dword); 1861 1862 err = write_to_scratch(ctx_a, engine, obj_a, 1863 offset, 0xdeadbeef); 1864 if (err == 0) 1865 err = read_from_scratch(ctx_b, engine, obj_b, 1866 offset, &value); 1867 if (err) 1868 goto put_b; 1869 1870 if (value != expected) { 1871 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1872 engine->name, value, 1873 upper_32_bits(offset), 1874 lower_32_bits(offset), 1875 this); 1876 err = -EINVAL; 1877 goto put_b; 1878 } 1879 1880 this++; 1881 } 1882 count += this; 1883 num_engines++; 1884 } 1885 pr_info("Checked %lu scratch offsets across %lu engines\n", 1886 count, num_engines); 1887 1888 put_b: 1889 i915_gem_object_put(obj_b); 1890 put_a: 1891 i915_gem_object_put(obj_a); 1892 out_file: 1893 if (igt_live_test_end(&t)) 1894 err = -EIO; 1895 fput(file); 1896 return err; 1897 } 1898 1899 int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1900 { 1901 static const struct i915_subtest tests[] = { 1902 SUBTEST(live_nop_switch), 1903 SUBTEST(live_parallel_switch), 1904 SUBTEST(igt_ctx_exec), 1905 SUBTEST(igt_ctx_readonly), 1906 SUBTEST(igt_ctx_sseu), 1907 SUBTEST(igt_shared_ctx_exec), 1908 SUBTEST(igt_vm_isolation), 1909 }; 1910 1911 if (intel_gt_is_wedged(to_gt(i915))) 1912 return 0; 1913 1914 return i915_live_subtests(tests, i915); 1915 } 1916