1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_engine_pm.h" 11 #include "gt/intel_engine_regs.h" 12 #include "gt/intel_gt.h" 13 #include "gt/intel_gt_requests.h" 14 #include "gt/intel_reset.h" 15 #include "i915_selftest.h" 16 17 #include "gem/selftests/igt_gem_utils.h" 18 #include "selftests/i915_random.h" 19 #include "selftests/igt_flush_test.h" 20 #include "selftests/igt_live_test.h" 21 #include "selftests/igt_reset.h" 22 #include "selftests/igt_spinner.h" 23 #include "selftests/mock_drm.h" 24 #include "selftests/mock_gem_device.h" 25 26 #include "huge_gem_object.h" 27 #include "igt_gem_utils.h" 28 29 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 30 31 static int live_nop_switch(void *arg) 32 { 33 const unsigned int nctx = 1024; 34 struct drm_i915_private *i915 = arg; 35 struct intel_engine_cs *engine; 36 struct i915_gem_context **ctx; 37 struct igt_live_test t; 38 struct file *file; 39 unsigned long n; 40 int err = -ENODEV; 41 42 /* 43 * Create as many contexts as we can feasibly get away with 44 * and check we can switch between them rapidly. 45 * 46 * Serves as very simple stress test for submission and HW switching 47 * between contexts. 48 */ 49 50 if (!DRIVER_CAPS(i915)->has_logical_contexts) 51 return 0; 52 53 file = mock_file(i915); 54 if (IS_ERR(file)) 55 return PTR_ERR(file); 56 57 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 58 if (!ctx) { 59 err = -ENOMEM; 60 goto out_file; 61 } 62 63 for (n = 0; n < nctx; n++) { 64 ctx[n] = live_context(i915, file); 65 if (IS_ERR(ctx[n])) { 66 err = PTR_ERR(ctx[n]); 67 goto out_file; 68 } 69 } 70 71 for_each_uabi_engine(engine, i915) { 72 struct i915_request *rq = NULL; 73 unsigned long end_time, prime; 74 ktime_t times[2] = {}; 75 76 times[0] = ktime_get_raw(); 77 for (n = 0; n < nctx; n++) { 78 struct i915_request *this; 79 80 this = igt_request_alloc(ctx[n], engine); 81 if (IS_ERR(this)) { 82 err = PTR_ERR(this); 83 goto out_file; 84 } 85 if (rq) { 86 i915_request_await_dma_fence(this, &rq->fence); 87 i915_request_put(rq); 88 } 89 rq = i915_request_get(this); 90 i915_request_add(this); 91 } 92 if (i915_request_wait(rq, 0, 10 * HZ) < 0) { 93 pr_err("Failed to populated %d contexts\n", nctx); 94 intel_gt_set_wedged(to_gt(i915)); 95 i915_request_put(rq); 96 err = -EIO; 97 goto out_file; 98 } 99 i915_request_put(rq); 100 101 times[1] = ktime_get_raw(); 102 103 pr_info("Populated %d contexts on %s in %lluns\n", 104 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 105 106 err = igt_live_test_begin(&t, i915, __func__, engine->name); 107 if (err) 108 goto out_file; 109 110 end_time = jiffies + i915_selftest.timeout_jiffies; 111 for_each_prime_number_from(prime, 2, 8192) { 112 times[1] = ktime_get_raw(); 113 114 rq = NULL; 115 for (n = 0; n < prime; n++) { 116 struct i915_request *this; 117 118 this = igt_request_alloc(ctx[n % nctx], engine); 119 if (IS_ERR(this)) { 120 err = PTR_ERR(this); 121 goto out_file; 122 } 123 124 if (rq) { /* Force submission order */ 125 i915_request_await_dma_fence(this, &rq->fence); 126 i915_request_put(rq); 127 } 128 129 /* 130 * This space is left intentionally blank. 131 * 132 * We do not actually want to perform any 133 * action with this request, we just want 134 * to measure the latency in allocation 135 * and submission of our breadcrumbs - 136 * ensuring that the bare request is sufficient 137 * for the system to work (i.e. proper HEAD 138 * tracking of the rings, interrupt handling, 139 * etc). It also gives us the lowest bounds 140 * for latency. 141 */ 142 143 rq = i915_request_get(this); 144 i915_request_add(this); 145 } 146 GEM_BUG_ON(!rq); 147 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 148 pr_err("Switching between %ld contexts timed out\n", 149 prime); 150 intel_gt_set_wedged(to_gt(i915)); 151 i915_request_put(rq); 152 break; 153 } 154 i915_request_put(rq); 155 156 times[1] = ktime_sub(ktime_get_raw(), times[1]); 157 if (prime == 2) 158 times[0] = times[1]; 159 160 if (__igt_timeout(end_time, NULL)) 161 break; 162 } 163 164 err = igt_live_test_end(&t); 165 if (err) 166 goto out_file; 167 168 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 169 engine->name, 170 ktime_to_ns(times[0]), 171 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 172 } 173 174 out_file: 175 fput(file); 176 return err; 177 } 178 179 struct parallel_switch { 180 struct task_struct *tsk; 181 struct intel_context *ce[2]; 182 }; 183 184 static int __live_parallel_switch1(void *data) 185 { 186 struct parallel_switch *arg = data; 187 IGT_TIMEOUT(end_time); 188 unsigned long count; 189 190 count = 0; 191 do { 192 struct i915_request *rq = NULL; 193 int err, n; 194 195 err = 0; 196 for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { 197 struct i915_request *prev = rq; 198 199 rq = i915_request_create(arg->ce[n]); 200 if (IS_ERR(rq)) { 201 i915_request_put(prev); 202 return PTR_ERR(rq); 203 } 204 205 i915_request_get(rq); 206 if (prev) { 207 err = i915_request_await_dma_fence(rq, &prev->fence); 208 i915_request_put(prev); 209 } 210 211 i915_request_add(rq); 212 } 213 if (i915_request_wait(rq, 0, HZ / 5) < 0) 214 err = -ETIME; 215 i915_request_put(rq); 216 if (err) 217 return err; 218 219 count++; 220 } while (!__igt_timeout(end_time, NULL)); 221 222 pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); 223 return 0; 224 } 225 226 static int __live_parallel_switchN(void *data) 227 { 228 struct parallel_switch *arg = data; 229 struct i915_request *rq = NULL; 230 IGT_TIMEOUT(end_time); 231 unsigned long count; 232 int n; 233 234 count = 0; 235 do { 236 for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { 237 struct i915_request *prev = rq; 238 int err = 0; 239 240 rq = i915_request_create(arg->ce[n]); 241 if (IS_ERR(rq)) { 242 i915_request_put(prev); 243 return PTR_ERR(rq); 244 } 245 246 i915_request_get(rq); 247 if (prev) { 248 err = i915_request_await_dma_fence(rq, &prev->fence); 249 i915_request_put(prev); 250 } 251 252 i915_request_add(rq); 253 if (err) { 254 i915_request_put(rq); 255 return err; 256 } 257 } 258 259 count++; 260 } while (!__igt_timeout(end_time, NULL)); 261 i915_request_put(rq); 262 263 pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); 264 return 0; 265 } 266 267 static int live_parallel_switch(void *arg) 268 { 269 struct drm_i915_private *i915 = arg; 270 static int (* const func[])(void *arg) = { 271 __live_parallel_switch1, 272 __live_parallel_switchN, 273 NULL, 274 }; 275 struct parallel_switch *data = NULL; 276 struct i915_gem_engines *engines; 277 struct i915_gem_engines_iter it; 278 int (* const *fn)(void *arg); 279 struct i915_gem_context *ctx; 280 struct intel_context *ce; 281 struct file *file; 282 int n, m, count; 283 int err = 0; 284 285 /* 286 * Check we can process switches on all engines simultaneously. 287 */ 288 289 if (!DRIVER_CAPS(i915)->has_logical_contexts) 290 return 0; 291 292 file = mock_file(i915); 293 if (IS_ERR(file)) 294 return PTR_ERR(file); 295 296 ctx = live_context(i915, file); 297 if (IS_ERR(ctx)) { 298 err = PTR_ERR(ctx); 299 goto out_file; 300 } 301 302 engines = i915_gem_context_lock_engines(ctx); 303 count = engines->num_engines; 304 305 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 306 if (!data) { 307 i915_gem_context_unlock_engines(ctx); 308 err = -ENOMEM; 309 goto out_file; 310 } 311 312 m = 0; /* Use the first context as our template for the engines */ 313 for_each_gem_engine(ce, engines, it) { 314 err = intel_context_pin(ce); 315 if (err) { 316 i915_gem_context_unlock_engines(ctx); 317 goto out; 318 } 319 data[m++].ce[0] = intel_context_get(ce); 320 } 321 i915_gem_context_unlock_engines(ctx); 322 323 /* Clone the same set of engines into the other contexts */ 324 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 325 ctx = live_context(i915, file); 326 if (IS_ERR(ctx)) { 327 err = PTR_ERR(ctx); 328 goto out; 329 } 330 331 for (m = 0; m < count; m++) { 332 if (!data[m].ce[0]) 333 continue; 334 335 ce = intel_context_create(data[m].ce[0]->engine); 336 if (IS_ERR(ce)) 337 goto out; 338 339 err = intel_context_pin(ce); 340 if (err) { 341 intel_context_put(ce); 342 goto out; 343 } 344 345 data[m].ce[n] = ce; 346 } 347 } 348 349 for (fn = func; !err && *fn; fn++) { 350 struct igt_live_test t; 351 int n; 352 353 err = igt_live_test_begin(&t, i915, __func__, ""); 354 if (err) 355 break; 356 357 for (n = 0; n < count; n++) { 358 if (!data[n].ce[0]) 359 continue; 360 361 data[n].tsk = kthread_run(*fn, &data[n], 362 "igt/parallel:%s", 363 data[n].ce[0]->engine->name); 364 if (IS_ERR(data[n].tsk)) { 365 err = PTR_ERR(data[n].tsk); 366 break; 367 } 368 get_task_struct(data[n].tsk); 369 } 370 371 yield(); /* start all threads before we kthread_stop() */ 372 373 for (n = 0; n < count; n++) { 374 int status; 375 376 if (IS_ERR_OR_NULL(data[n].tsk)) 377 continue; 378 379 status = kthread_stop(data[n].tsk); 380 if (status && !err) 381 err = status; 382 383 put_task_struct(data[n].tsk); 384 data[n].tsk = NULL; 385 } 386 387 if (igt_live_test_end(&t)) 388 err = -EIO; 389 } 390 391 out: 392 for (n = 0; n < count; n++) { 393 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 394 if (!data[n].ce[m]) 395 continue; 396 397 intel_context_unpin(data[n].ce[m]); 398 intel_context_put(data[n].ce[m]); 399 } 400 } 401 kfree(data); 402 out_file: 403 fput(file); 404 return err; 405 } 406 407 static unsigned long real_page_count(struct drm_i915_gem_object *obj) 408 { 409 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 410 } 411 412 static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 413 { 414 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 415 } 416 417 static int gpu_fill(struct intel_context *ce, 418 struct drm_i915_gem_object *obj, 419 unsigned int dw) 420 { 421 struct i915_vma *vma; 422 int err; 423 424 GEM_BUG_ON(obj->base.size > ce->vm->total); 425 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 426 427 vma = i915_vma_instance(obj, ce->vm, NULL); 428 if (IS_ERR(vma)) 429 return PTR_ERR(vma); 430 431 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 432 if (err) 433 return err; 434 435 /* 436 * Within the GTT the huge objects maps every page onto 437 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 438 * We set the nth dword within the page using the nth 439 * mapping via the GTT - this should exercise the GTT mapping 440 * whilst checking that each context provides a unique view 441 * into the object. 442 */ 443 err = igt_gpu_fill_dw(ce, vma, 444 (dw * real_page_count(obj)) << PAGE_SHIFT | 445 (dw * sizeof(u32)), 446 real_page_count(obj), 447 dw); 448 i915_vma_unpin(vma); 449 450 return err; 451 } 452 453 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 454 { 455 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 456 unsigned int n, m, need_flush; 457 int err; 458 459 i915_gem_object_lock(obj, NULL); 460 err = i915_gem_object_prepare_write(obj, &need_flush); 461 if (err) 462 goto out; 463 464 for (n = 0; n < real_page_count(obj); n++) { 465 u32 *map; 466 467 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 468 for (m = 0; m < DW_PER_PAGE; m++) 469 map[m] = value; 470 if (!has_llc) 471 drm_clflush_virt_range(map, PAGE_SIZE); 472 kunmap_atomic(map); 473 } 474 475 i915_gem_object_finish_access(obj); 476 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 477 obj->write_domain = 0; 478 out: 479 i915_gem_object_unlock(obj); 480 return err; 481 } 482 483 static noinline int cpu_check(struct drm_i915_gem_object *obj, 484 unsigned int idx, unsigned int max) 485 { 486 unsigned int n, m, needs_flush; 487 int err; 488 489 i915_gem_object_lock(obj, NULL); 490 err = i915_gem_object_prepare_read(obj, &needs_flush); 491 if (err) 492 goto out_unlock; 493 494 for (n = 0; n < real_page_count(obj); n++) { 495 u32 *map; 496 497 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 498 if (needs_flush & CLFLUSH_BEFORE) 499 drm_clflush_virt_range(map, PAGE_SIZE); 500 501 for (m = 0; m < max; m++) { 502 if (map[m] != m) { 503 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", 504 __builtin_return_address(0), idx, 505 n, real_page_count(obj), m, max, 506 map[m], m); 507 err = -EINVAL; 508 goto out_unmap; 509 } 510 } 511 512 for (; m < DW_PER_PAGE; m++) { 513 if (map[m] != STACK_MAGIC) { 514 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", 515 __builtin_return_address(0), idx, n, m, 516 map[m], STACK_MAGIC); 517 err = -EINVAL; 518 goto out_unmap; 519 } 520 } 521 522 out_unmap: 523 kunmap_atomic(map); 524 if (err) 525 break; 526 } 527 528 i915_gem_object_finish_access(obj); 529 out_unlock: 530 i915_gem_object_unlock(obj); 531 return err; 532 } 533 534 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) 535 { 536 int err; 537 538 GEM_BUG_ON(obj->base.handle_count); 539 540 /* tie the object to the drm_file for easy reaping */ 541 err = idr_alloc(&to_drm_file(file)->object_idr, 542 &obj->base, 1, 0, GFP_KERNEL); 543 if (err < 0) 544 return err; 545 546 i915_gem_object_get(obj); 547 obj->base.handle_count++; 548 return 0; 549 } 550 551 static struct drm_i915_gem_object * 552 create_test_object(struct i915_address_space *vm, 553 struct file *file, 554 struct list_head *objects) 555 { 556 struct drm_i915_gem_object *obj; 557 u64 size; 558 int err; 559 560 /* Keep in GEM's good graces */ 561 intel_gt_retire_requests(vm->gt); 562 563 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 564 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 565 566 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 567 if (IS_ERR(obj)) 568 return obj; 569 570 err = file_add_object(file, obj); 571 i915_gem_object_put(obj); 572 if (err) 573 return ERR_PTR(err); 574 575 err = cpu_fill(obj, STACK_MAGIC); 576 if (err) { 577 pr_err("Failed to fill object with cpu, err=%d\n", 578 err); 579 return ERR_PTR(err); 580 } 581 582 list_add_tail(&obj->st_link, objects); 583 return obj; 584 } 585 586 static unsigned long max_dwords(struct drm_i915_gem_object *obj) 587 { 588 unsigned long npages = fake_page_count(obj); 589 590 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 591 return npages / DW_PER_PAGE; 592 } 593 594 static void throttle_release(struct i915_request **q, int count) 595 { 596 int i; 597 598 for (i = 0; i < count; i++) { 599 if (IS_ERR_OR_NULL(q[i])) 600 continue; 601 602 i915_request_put(fetch_and_zero(&q[i])); 603 } 604 } 605 606 static int throttle(struct intel_context *ce, 607 struct i915_request **q, int count) 608 { 609 int i; 610 611 if (!IS_ERR_OR_NULL(q[0])) { 612 if (i915_request_wait(q[0], 613 I915_WAIT_INTERRUPTIBLE, 614 MAX_SCHEDULE_TIMEOUT) < 0) 615 return -EINTR; 616 617 i915_request_put(q[0]); 618 } 619 620 for (i = 0; i < count - 1; i++) 621 q[i] = q[i + 1]; 622 623 q[i] = intel_context_create_request(ce); 624 if (IS_ERR(q[i])) 625 return PTR_ERR(q[i]); 626 627 i915_request_get(q[i]); 628 i915_request_add(q[i]); 629 630 return 0; 631 } 632 633 static int igt_ctx_exec(void *arg) 634 { 635 struct drm_i915_private *i915 = arg; 636 struct intel_engine_cs *engine; 637 int err = -ENODEV; 638 639 /* 640 * Create a few different contexts (with different mm) and write 641 * through each ctx/mm using the GPU making sure those writes end 642 * up in the expected pages of our obj. 643 */ 644 645 if (!DRIVER_CAPS(i915)->has_logical_contexts) 646 return 0; 647 648 for_each_uabi_engine(engine, i915) { 649 struct drm_i915_gem_object *obj = NULL; 650 unsigned long ncontexts, ndwords, dw; 651 struct i915_request *tq[5] = {}; 652 struct igt_live_test t; 653 IGT_TIMEOUT(end_time); 654 LIST_HEAD(objects); 655 struct file *file; 656 657 if (!intel_engine_can_store_dword(engine)) 658 continue; 659 660 if (!engine->context_size) 661 continue; /* No logical context support in HW */ 662 663 file = mock_file(i915); 664 if (IS_ERR(file)) 665 return PTR_ERR(file); 666 667 err = igt_live_test_begin(&t, i915, __func__, engine->name); 668 if (err) 669 goto out_file; 670 671 ncontexts = 0; 672 ndwords = 0; 673 dw = 0; 674 while (!time_after(jiffies, end_time)) { 675 struct i915_gem_context *ctx; 676 struct intel_context *ce; 677 678 ctx = kernel_context(i915, NULL); 679 if (IS_ERR(ctx)) { 680 err = PTR_ERR(ctx); 681 goto out_file; 682 } 683 684 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 685 GEM_BUG_ON(IS_ERR(ce)); 686 687 if (!obj) { 688 obj = create_test_object(ce->vm, file, &objects); 689 if (IS_ERR(obj)) { 690 err = PTR_ERR(obj); 691 intel_context_put(ce); 692 kernel_context_close(ctx); 693 goto out_file; 694 } 695 } 696 697 err = gpu_fill(ce, obj, dw); 698 if (err) { 699 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 700 ndwords, dw, max_dwords(obj), 701 engine->name, 702 yesno(i915_gem_context_has_full_ppgtt(ctx)), 703 err); 704 intel_context_put(ce); 705 kernel_context_close(ctx); 706 goto out_file; 707 } 708 709 err = throttle(ce, tq, ARRAY_SIZE(tq)); 710 if (err) { 711 intel_context_put(ce); 712 kernel_context_close(ctx); 713 goto out_file; 714 } 715 716 if (++dw == max_dwords(obj)) { 717 obj = NULL; 718 dw = 0; 719 } 720 721 ndwords++; 722 ncontexts++; 723 724 intel_context_put(ce); 725 kernel_context_close(ctx); 726 } 727 728 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 729 ncontexts, engine->name, ndwords); 730 731 ncontexts = dw = 0; 732 list_for_each_entry(obj, &objects, st_link) { 733 unsigned int rem = 734 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 735 736 err = cpu_check(obj, ncontexts++, rem); 737 if (err) 738 break; 739 740 dw += rem; 741 } 742 743 out_file: 744 throttle_release(tq, ARRAY_SIZE(tq)); 745 if (igt_live_test_end(&t)) 746 err = -EIO; 747 748 fput(file); 749 if (err) 750 return err; 751 752 i915_gem_drain_freed_objects(i915); 753 } 754 755 return 0; 756 } 757 758 static int igt_shared_ctx_exec(void *arg) 759 { 760 struct drm_i915_private *i915 = arg; 761 struct i915_request *tq[5] = {}; 762 struct i915_gem_context *parent; 763 struct intel_engine_cs *engine; 764 struct igt_live_test t; 765 struct file *file; 766 int err = 0; 767 768 /* 769 * Create a few different contexts with the same mm and write 770 * through each ctx using the GPU making sure those writes end 771 * up in the expected pages of our obj. 772 */ 773 if (!DRIVER_CAPS(i915)->has_logical_contexts) 774 return 0; 775 776 file = mock_file(i915); 777 if (IS_ERR(file)) 778 return PTR_ERR(file); 779 780 parent = live_context(i915, file); 781 if (IS_ERR(parent)) { 782 err = PTR_ERR(parent); 783 goto out_file; 784 } 785 786 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 787 err = 0; 788 goto out_file; 789 } 790 791 err = igt_live_test_begin(&t, i915, __func__, ""); 792 if (err) 793 goto out_file; 794 795 for_each_uabi_engine(engine, i915) { 796 unsigned long ncontexts, ndwords, dw; 797 struct drm_i915_gem_object *obj = NULL; 798 IGT_TIMEOUT(end_time); 799 LIST_HEAD(objects); 800 801 if (!intel_engine_can_store_dword(engine)) 802 continue; 803 804 dw = 0; 805 ndwords = 0; 806 ncontexts = 0; 807 while (!time_after(jiffies, end_time)) { 808 struct i915_gem_context *ctx; 809 struct intel_context *ce; 810 811 ctx = kernel_context(i915, parent->vm); 812 if (IS_ERR(ctx)) { 813 err = PTR_ERR(ctx); 814 goto out_test; 815 } 816 817 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 818 GEM_BUG_ON(IS_ERR(ce)); 819 820 if (!obj) { 821 obj = create_test_object(parent->vm, 822 file, &objects); 823 if (IS_ERR(obj)) { 824 err = PTR_ERR(obj); 825 intel_context_put(ce); 826 kernel_context_close(ctx); 827 goto out_test; 828 } 829 } 830 831 err = gpu_fill(ce, obj, dw); 832 if (err) { 833 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 834 ndwords, dw, max_dwords(obj), 835 engine->name, 836 yesno(i915_gem_context_has_full_ppgtt(ctx)), 837 err); 838 intel_context_put(ce); 839 kernel_context_close(ctx); 840 goto out_test; 841 } 842 843 err = throttle(ce, tq, ARRAY_SIZE(tq)); 844 if (err) { 845 intel_context_put(ce); 846 kernel_context_close(ctx); 847 goto out_test; 848 } 849 850 if (++dw == max_dwords(obj)) { 851 obj = NULL; 852 dw = 0; 853 } 854 855 ndwords++; 856 ncontexts++; 857 858 intel_context_put(ce); 859 kernel_context_close(ctx); 860 } 861 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 862 ncontexts, engine->name, ndwords); 863 864 ncontexts = dw = 0; 865 list_for_each_entry(obj, &objects, st_link) { 866 unsigned int rem = 867 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 868 869 err = cpu_check(obj, ncontexts++, rem); 870 if (err) 871 goto out_test; 872 873 dw += rem; 874 } 875 876 i915_gem_drain_freed_objects(i915); 877 } 878 out_test: 879 throttle_release(tq, ARRAY_SIZE(tq)); 880 if (igt_live_test_end(&t)) 881 err = -EIO; 882 out_file: 883 fput(file); 884 return err; 885 } 886 887 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma) 888 { 889 u32 *cmd; 890 891 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8); 892 893 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); 894 if (IS_ERR(cmd)) 895 return PTR_ERR(cmd); 896 897 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 898 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE)); 899 *cmd++ = lower_32_bits(vma->node.start); 900 *cmd++ = upper_32_bits(vma->node.start); 901 *cmd = MI_BATCH_BUFFER_END; 902 903 __i915_gem_object_flush_map(rpcs, 0, 64); 904 i915_gem_object_unpin_map(rpcs); 905 906 intel_gt_chipset_flush(vma->vm->gt); 907 908 return 0; 909 } 910 911 static int 912 emit_rpcs_query(struct drm_i915_gem_object *obj, 913 struct intel_context *ce, 914 struct i915_request **rq_out) 915 { 916 struct drm_i915_private *i915 = to_i915(obj->base.dev); 917 struct i915_request *rq; 918 struct i915_gem_ww_ctx ww; 919 struct i915_vma *batch; 920 struct i915_vma *vma; 921 struct drm_i915_gem_object *rpcs; 922 int err; 923 924 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 925 926 if (GRAPHICS_VER(i915) < 8) 927 return -EINVAL; 928 929 vma = i915_vma_instance(obj, ce->vm, NULL); 930 if (IS_ERR(vma)) 931 return PTR_ERR(vma); 932 933 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); 934 if (IS_ERR(rpcs)) 935 return PTR_ERR(rpcs); 936 937 batch = i915_vma_instance(rpcs, ce->vm, NULL); 938 if (IS_ERR(batch)) { 939 err = PTR_ERR(batch); 940 goto err_put; 941 } 942 943 i915_gem_ww_ctx_init(&ww, false); 944 retry: 945 err = i915_gem_object_lock(obj, &ww); 946 if (!err) 947 err = i915_gem_object_lock(rpcs, &ww); 948 if (!err) 949 err = i915_gem_object_set_to_gtt_domain(obj, false); 950 if (!err) 951 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); 952 if (err) 953 goto err_put; 954 955 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); 956 if (err) 957 goto err_vma; 958 959 err = rpcs_query_batch(rpcs, vma); 960 if (err) 961 goto err_batch; 962 963 rq = i915_request_create(ce); 964 if (IS_ERR(rq)) { 965 err = PTR_ERR(rq); 966 goto err_batch; 967 } 968 969 err = i915_request_await_object(rq, batch->obj, false); 970 if (err == 0) 971 err = i915_vma_move_to_active(batch, rq, 0); 972 if (err) 973 goto skip_request; 974 975 err = i915_request_await_object(rq, vma->obj, true); 976 if (err == 0) 977 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 978 if (err) 979 goto skip_request; 980 981 if (rq->engine->emit_init_breadcrumb) { 982 err = rq->engine->emit_init_breadcrumb(rq); 983 if (err) 984 goto skip_request; 985 } 986 987 err = rq->engine->emit_bb_start(rq, 988 batch->node.start, batch->node.size, 989 0); 990 if (err) 991 goto skip_request; 992 993 *rq_out = i915_request_get(rq); 994 995 skip_request: 996 if (err) 997 i915_request_set_error_once(rq, err); 998 i915_request_add(rq); 999 err_batch: 1000 i915_vma_unpin(batch); 1001 err_vma: 1002 i915_vma_unpin(vma); 1003 err_put: 1004 if (err == -EDEADLK) { 1005 err = i915_gem_ww_ctx_backoff(&ww); 1006 if (!err) 1007 goto retry; 1008 } 1009 i915_gem_ww_ctx_fini(&ww); 1010 i915_gem_object_put(rpcs); 1011 return err; 1012 } 1013 1014 #define TEST_IDLE BIT(0) 1015 #define TEST_BUSY BIT(1) 1016 #define TEST_RESET BIT(2) 1017 1018 static int 1019 __sseu_prepare(const char *name, 1020 unsigned int flags, 1021 struct intel_context *ce, 1022 struct igt_spinner **spin) 1023 { 1024 struct i915_request *rq; 1025 int ret; 1026 1027 *spin = NULL; 1028 if (!(flags & (TEST_BUSY | TEST_RESET))) 1029 return 0; 1030 1031 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1032 if (!*spin) 1033 return -ENOMEM; 1034 1035 ret = igt_spinner_init(*spin, ce->engine->gt); 1036 if (ret) 1037 goto err_free; 1038 1039 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1040 if (IS_ERR(rq)) { 1041 ret = PTR_ERR(rq); 1042 goto err_fini; 1043 } 1044 1045 i915_request_add(rq); 1046 1047 if (!igt_wait_for_spinner(*spin, rq)) { 1048 pr_err("%s: Spinner failed to start!\n", name); 1049 ret = -ETIMEDOUT; 1050 goto err_end; 1051 } 1052 1053 return 0; 1054 1055 err_end: 1056 igt_spinner_end(*spin); 1057 err_fini: 1058 igt_spinner_fini(*spin); 1059 err_free: 1060 kfree(fetch_and_zero(spin)); 1061 return ret; 1062 } 1063 1064 static int 1065 __read_slice_count(struct intel_context *ce, 1066 struct drm_i915_gem_object *obj, 1067 struct igt_spinner *spin, 1068 u32 *rpcs) 1069 { 1070 struct i915_request *rq = NULL; 1071 u32 s_mask, s_shift; 1072 unsigned int cnt; 1073 u32 *buf, val; 1074 long ret; 1075 1076 ret = emit_rpcs_query(obj, ce, &rq); 1077 if (ret) 1078 return ret; 1079 1080 if (spin) 1081 igt_spinner_end(spin); 1082 1083 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1084 i915_request_put(rq); 1085 if (ret < 0) 1086 return ret; 1087 1088 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1089 if (IS_ERR(buf)) { 1090 ret = PTR_ERR(buf); 1091 return ret; 1092 } 1093 1094 if (GRAPHICS_VER(ce->engine->i915) >= 11) { 1095 s_mask = GEN11_RPCS_S_CNT_MASK; 1096 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1097 } else { 1098 s_mask = GEN8_RPCS_S_CNT_MASK; 1099 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1100 } 1101 1102 val = *buf; 1103 cnt = (val & s_mask) >> s_shift; 1104 *rpcs = val; 1105 1106 i915_gem_object_unpin_map(obj); 1107 1108 return cnt; 1109 } 1110 1111 static int 1112 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1113 const char *prefix, const char *suffix) 1114 { 1115 if (slices == expected) 1116 return 0; 1117 1118 if (slices < 0) { 1119 pr_err("%s: %s read slice count failed with %d%s\n", 1120 name, prefix, slices, suffix); 1121 return slices; 1122 } 1123 1124 pr_err("%s: %s slice count %d is not %u%s\n", 1125 name, prefix, slices, expected, suffix); 1126 1127 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1128 rpcs, slices, 1129 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1130 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1131 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1132 1133 return -EINVAL; 1134 } 1135 1136 static int 1137 __sseu_finish(const char *name, 1138 unsigned int flags, 1139 struct intel_context *ce, 1140 struct drm_i915_gem_object *obj, 1141 unsigned int expected, 1142 struct igt_spinner *spin) 1143 { 1144 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1145 u32 rpcs = 0; 1146 int ret = 0; 1147 1148 if (flags & TEST_RESET) { 1149 ret = intel_engine_reset(ce->engine, "sseu"); 1150 if (ret) 1151 goto out; 1152 } 1153 1154 ret = __read_slice_count(ce, obj, 1155 flags & TEST_RESET ? NULL : spin, &rpcs); 1156 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1157 if (ret) 1158 goto out; 1159 1160 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1161 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1162 1163 out: 1164 if (spin) 1165 igt_spinner_end(spin); 1166 1167 if ((flags & TEST_IDLE) && ret == 0) { 1168 ret = igt_flush_test(ce->engine->i915); 1169 if (ret) 1170 return ret; 1171 1172 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1173 ret = __check_rpcs(name, rpcs, ret, expected, 1174 "Context", " after idle!"); 1175 } 1176 1177 return ret; 1178 } 1179 1180 static int 1181 __sseu_test(const char *name, 1182 unsigned int flags, 1183 struct intel_context *ce, 1184 struct drm_i915_gem_object *obj, 1185 struct intel_sseu sseu) 1186 { 1187 struct igt_spinner *spin = NULL; 1188 int ret; 1189 1190 intel_engine_pm_get(ce->engine); 1191 1192 ret = __sseu_prepare(name, flags, ce, &spin); 1193 if (ret) 1194 goto out_pm; 1195 1196 ret = intel_context_reconfigure_sseu(ce, sseu); 1197 if (ret) 1198 goto out_spin; 1199 1200 ret = __sseu_finish(name, flags, ce, obj, 1201 hweight32(sseu.slice_mask), spin); 1202 1203 out_spin: 1204 if (spin) { 1205 igt_spinner_end(spin); 1206 igt_spinner_fini(spin); 1207 kfree(spin); 1208 } 1209 out_pm: 1210 intel_engine_pm_put(ce->engine); 1211 return ret; 1212 } 1213 1214 static int 1215 __igt_ctx_sseu(struct drm_i915_private *i915, 1216 const char *name, 1217 unsigned int flags) 1218 { 1219 struct drm_i915_gem_object *obj; 1220 int inst = 0; 1221 int ret = 0; 1222 1223 if (GRAPHICS_VER(i915) < 9) 1224 return 0; 1225 1226 if (flags & TEST_RESET) 1227 igt_global_reset_lock(to_gt(i915)); 1228 1229 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1230 if (IS_ERR(obj)) { 1231 ret = PTR_ERR(obj); 1232 goto out_unlock; 1233 } 1234 1235 do { 1236 struct intel_engine_cs *engine; 1237 struct intel_context *ce; 1238 struct intel_sseu pg_sseu; 1239 1240 engine = intel_engine_lookup_user(i915, 1241 I915_ENGINE_CLASS_RENDER, 1242 inst++); 1243 if (!engine) 1244 break; 1245 1246 if (hweight32(engine->sseu.slice_mask) < 2) 1247 continue; 1248 1249 if (!engine->gt->info.sseu.has_slice_pg) 1250 continue; 1251 1252 /* 1253 * Gen11 VME friendly power-gated configuration with 1254 * half enabled sub-slices. 1255 */ 1256 pg_sseu = engine->sseu; 1257 pg_sseu.slice_mask = 1; 1258 pg_sseu.subslice_mask = 1259 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1260 1261 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1262 engine->name, name, flags, 1263 hweight32(engine->sseu.slice_mask), 1264 hweight32(pg_sseu.slice_mask)); 1265 1266 ce = intel_context_create(engine); 1267 if (IS_ERR(ce)) { 1268 ret = PTR_ERR(ce); 1269 goto out_put; 1270 } 1271 1272 ret = intel_context_pin(ce); 1273 if (ret) 1274 goto out_ce; 1275 1276 /* First set the default mask. */ 1277 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1278 if (ret) 1279 goto out_unpin; 1280 1281 /* Then set a power-gated configuration. */ 1282 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1283 if (ret) 1284 goto out_unpin; 1285 1286 /* Back to defaults. */ 1287 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1288 if (ret) 1289 goto out_unpin; 1290 1291 /* One last power-gated configuration for the road. */ 1292 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1293 if (ret) 1294 goto out_unpin; 1295 1296 out_unpin: 1297 intel_context_unpin(ce); 1298 out_ce: 1299 intel_context_put(ce); 1300 } while (!ret); 1301 1302 if (igt_flush_test(i915)) 1303 ret = -EIO; 1304 1305 out_put: 1306 i915_gem_object_put(obj); 1307 1308 out_unlock: 1309 if (flags & TEST_RESET) 1310 igt_global_reset_unlock(to_gt(i915)); 1311 1312 if (ret) 1313 pr_err("%s: Failed with %d!\n", name, ret); 1314 1315 return ret; 1316 } 1317 1318 static int igt_ctx_sseu(void *arg) 1319 { 1320 struct { 1321 const char *name; 1322 unsigned int flags; 1323 } *phase, phases[] = { 1324 { .name = "basic", .flags = 0 }, 1325 { .name = "idle", .flags = TEST_IDLE }, 1326 { .name = "busy", .flags = TEST_BUSY }, 1327 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1328 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1329 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1330 }; 1331 unsigned int i; 1332 int ret = 0; 1333 1334 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1335 i++, phase++) 1336 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1337 1338 return ret; 1339 } 1340 1341 static int igt_ctx_readonly(void *arg) 1342 { 1343 struct drm_i915_private *i915 = arg; 1344 unsigned long idx, ndwords, dw, num_engines; 1345 struct drm_i915_gem_object *obj = NULL; 1346 struct i915_request *tq[5] = {}; 1347 struct i915_gem_engines_iter it; 1348 struct i915_address_space *vm; 1349 struct i915_gem_context *ctx; 1350 struct intel_context *ce; 1351 struct igt_live_test t; 1352 I915_RND_STATE(prng); 1353 IGT_TIMEOUT(end_time); 1354 LIST_HEAD(objects); 1355 struct file *file; 1356 int err = -ENODEV; 1357 1358 /* 1359 * Create a few read-only objects (with the occasional writable object) 1360 * and try to write into these object checking that the GPU discards 1361 * any write to a read-only object. 1362 */ 1363 1364 file = mock_file(i915); 1365 if (IS_ERR(file)) 1366 return PTR_ERR(file); 1367 1368 err = igt_live_test_begin(&t, i915, __func__, ""); 1369 if (err) 1370 goto out_file; 1371 1372 ctx = live_context(i915, file); 1373 if (IS_ERR(ctx)) { 1374 err = PTR_ERR(ctx); 1375 goto out_file; 1376 } 1377 1378 vm = ctx->vm ?: &i915->ggtt.alias->vm; 1379 if (!vm || !vm->has_read_only) { 1380 err = 0; 1381 goto out_file; 1382 } 1383 1384 num_engines = 0; 1385 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) 1386 if (intel_engine_can_store_dword(ce->engine)) 1387 num_engines++; 1388 i915_gem_context_unlock_engines(ctx); 1389 1390 ndwords = 0; 1391 dw = 0; 1392 while (!time_after(jiffies, end_time)) { 1393 for_each_gem_engine(ce, 1394 i915_gem_context_lock_engines(ctx), it) { 1395 if (!intel_engine_can_store_dword(ce->engine)) 1396 continue; 1397 1398 if (!obj) { 1399 obj = create_test_object(ce->vm, file, &objects); 1400 if (IS_ERR(obj)) { 1401 err = PTR_ERR(obj); 1402 i915_gem_context_unlock_engines(ctx); 1403 goto out_file; 1404 } 1405 1406 if (prandom_u32_state(&prng) & 1) 1407 i915_gem_object_set_readonly(obj); 1408 } 1409 1410 err = gpu_fill(ce, obj, dw); 1411 if (err) { 1412 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1413 ndwords, dw, max_dwords(obj), 1414 ce->engine->name, 1415 yesno(i915_gem_context_has_full_ppgtt(ctx)), 1416 err); 1417 i915_gem_context_unlock_engines(ctx); 1418 goto out_file; 1419 } 1420 1421 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1422 if (err) { 1423 i915_gem_context_unlock_engines(ctx); 1424 goto out_file; 1425 } 1426 1427 if (++dw == max_dwords(obj)) { 1428 obj = NULL; 1429 dw = 0; 1430 } 1431 ndwords++; 1432 } 1433 i915_gem_context_unlock_engines(ctx); 1434 } 1435 pr_info("Submitted %lu dwords (across %lu engines)\n", 1436 ndwords, num_engines); 1437 1438 dw = 0; 1439 idx = 0; 1440 list_for_each_entry(obj, &objects, st_link) { 1441 unsigned int rem = 1442 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1443 unsigned int num_writes; 1444 1445 num_writes = rem; 1446 if (i915_gem_object_is_readonly(obj)) 1447 num_writes = 0; 1448 1449 err = cpu_check(obj, idx++, num_writes); 1450 if (err) 1451 break; 1452 1453 dw += rem; 1454 } 1455 1456 out_file: 1457 throttle_release(tq, ARRAY_SIZE(tq)); 1458 if (igt_live_test_end(&t)) 1459 err = -EIO; 1460 1461 fput(file); 1462 return err; 1463 } 1464 1465 static int check_scratch(struct i915_address_space *vm, u64 offset) 1466 { 1467 struct drm_mm_node *node; 1468 1469 mutex_lock(&vm->mutex); 1470 node = __drm_mm_interval_first(&vm->mm, 1471 offset, offset + sizeof(u32) - 1); 1472 mutex_unlock(&vm->mutex); 1473 if (!node || node->start > offset) 1474 return 0; 1475 1476 GEM_BUG_ON(offset >= node->start + node->size); 1477 1478 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1479 upper_32_bits(offset), lower_32_bits(offset)); 1480 return -EINVAL; 1481 } 1482 1483 static int write_to_scratch(struct i915_gem_context *ctx, 1484 struct intel_engine_cs *engine, 1485 struct drm_i915_gem_object *obj, 1486 u64 offset, u32 value) 1487 { 1488 struct drm_i915_private *i915 = ctx->i915; 1489 struct i915_address_space *vm; 1490 struct i915_request *rq; 1491 struct i915_vma *vma; 1492 u32 *cmd; 1493 int err; 1494 1495 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1496 1497 err = check_scratch(ctx->vm, offset); 1498 if (err) 1499 return err; 1500 1501 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1502 if (IS_ERR(cmd)) 1503 return PTR_ERR(cmd); 1504 1505 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1506 if (GRAPHICS_VER(i915) >= 8) { 1507 *cmd++ = lower_32_bits(offset); 1508 *cmd++ = upper_32_bits(offset); 1509 } else { 1510 *cmd++ = 0; 1511 *cmd++ = offset; 1512 } 1513 *cmd++ = value; 1514 *cmd = MI_BATCH_BUFFER_END; 1515 __i915_gem_object_flush_map(obj, 0, 64); 1516 i915_gem_object_unpin_map(obj); 1517 1518 intel_gt_chipset_flush(engine->gt); 1519 1520 vm = i915_gem_context_get_eb_vm(ctx); 1521 vma = i915_vma_instance(obj, vm, NULL); 1522 if (IS_ERR(vma)) { 1523 err = PTR_ERR(vma); 1524 goto out_vm; 1525 } 1526 1527 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1528 if (err) 1529 goto out_vm; 1530 1531 rq = igt_request_alloc(ctx, engine); 1532 if (IS_ERR(rq)) { 1533 err = PTR_ERR(rq); 1534 goto err_unpin; 1535 } 1536 1537 i915_vma_lock(vma); 1538 err = i915_request_await_object(rq, vma->obj, false); 1539 if (err == 0) 1540 err = i915_vma_move_to_active(vma, rq, 0); 1541 i915_vma_unlock(vma); 1542 if (err) 1543 goto skip_request; 1544 1545 if (rq->engine->emit_init_breadcrumb) { 1546 err = rq->engine->emit_init_breadcrumb(rq); 1547 if (err) 1548 goto skip_request; 1549 } 1550 1551 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); 1552 if (err) 1553 goto skip_request; 1554 1555 i915_vma_unpin(vma); 1556 1557 i915_request_add(rq); 1558 1559 goto out_vm; 1560 skip_request: 1561 i915_request_set_error_once(rq, err); 1562 i915_request_add(rq); 1563 err_unpin: 1564 i915_vma_unpin(vma); 1565 out_vm: 1566 i915_vm_put(vm); 1567 1568 if (!err) 1569 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1570 1571 return err; 1572 } 1573 1574 static int read_from_scratch(struct i915_gem_context *ctx, 1575 struct intel_engine_cs *engine, 1576 struct drm_i915_gem_object *obj, 1577 u64 offset, u32 *value) 1578 { 1579 struct drm_i915_private *i915 = ctx->i915; 1580 struct i915_address_space *vm; 1581 const u32 result = 0x100; 1582 struct i915_request *rq; 1583 struct i915_vma *vma; 1584 unsigned int flags; 1585 u32 *cmd; 1586 int err; 1587 1588 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1589 1590 err = check_scratch(ctx->vm, offset); 1591 if (err) 1592 return err; 1593 1594 if (GRAPHICS_VER(i915) >= 8) { 1595 const u32 GPR0 = engine->mmio_base + 0x600; 1596 1597 vm = i915_gem_context_get_eb_vm(ctx); 1598 vma = i915_vma_instance(obj, vm, NULL); 1599 if (IS_ERR(vma)) { 1600 err = PTR_ERR(vma); 1601 goto out_vm; 1602 } 1603 1604 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1605 if (err) 1606 goto out_vm; 1607 1608 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1609 if (IS_ERR(cmd)) { 1610 err = PTR_ERR(cmd); 1611 goto err_unpin; 1612 } 1613 1614 memset(cmd, POISON_INUSE, PAGE_SIZE); 1615 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1616 *cmd++ = GPR0; 1617 *cmd++ = lower_32_bits(offset); 1618 *cmd++ = upper_32_bits(offset); 1619 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1620 *cmd++ = GPR0; 1621 *cmd++ = result; 1622 *cmd++ = 0; 1623 *cmd = MI_BATCH_BUFFER_END; 1624 1625 i915_gem_object_flush_map(obj); 1626 i915_gem_object_unpin_map(obj); 1627 1628 flags = 0; 1629 } else { 1630 const u32 reg = engine->mmio_base + 0x420; 1631 1632 /* hsw: register access even to 3DPRIM! is protected */ 1633 vm = i915_vm_get(&engine->gt->ggtt->vm); 1634 vma = i915_vma_instance(obj, vm, NULL); 1635 if (IS_ERR(vma)) { 1636 err = PTR_ERR(vma); 1637 goto out_vm; 1638 } 1639 1640 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1641 if (err) 1642 goto out_vm; 1643 1644 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1645 if (IS_ERR(cmd)) { 1646 err = PTR_ERR(cmd); 1647 goto err_unpin; 1648 } 1649 1650 memset(cmd, POISON_INUSE, PAGE_SIZE); 1651 *cmd++ = MI_LOAD_REGISTER_MEM; 1652 *cmd++ = reg; 1653 *cmd++ = offset; 1654 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 1655 *cmd++ = reg; 1656 *cmd++ = vma->node.start + result; 1657 *cmd = MI_BATCH_BUFFER_END; 1658 1659 i915_gem_object_flush_map(obj); 1660 i915_gem_object_unpin_map(obj); 1661 1662 flags = I915_DISPATCH_SECURE; 1663 } 1664 1665 intel_gt_chipset_flush(engine->gt); 1666 1667 rq = igt_request_alloc(ctx, engine); 1668 if (IS_ERR(rq)) { 1669 err = PTR_ERR(rq); 1670 goto err_unpin; 1671 } 1672 1673 i915_vma_lock(vma); 1674 err = i915_request_await_object(rq, vma->obj, true); 1675 if (err == 0) 1676 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1677 i915_vma_unlock(vma); 1678 if (err) 1679 goto skip_request; 1680 1681 if (rq->engine->emit_init_breadcrumb) { 1682 err = rq->engine->emit_init_breadcrumb(rq); 1683 if (err) 1684 goto skip_request; 1685 } 1686 1687 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags); 1688 if (err) 1689 goto skip_request; 1690 1691 i915_vma_unpin(vma); 1692 1693 i915_request_add(rq); 1694 1695 i915_gem_object_lock(obj, NULL); 1696 err = i915_gem_object_set_to_cpu_domain(obj, false); 1697 i915_gem_object_unlock(obj); 1698 if (err) 1699 goto out_vm; 1700 1701 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1702 if (IS_ERR(cmd)) { 1703 err = PTR_ERR(cmd); 1704 goto out_vm; 1705 } 1706 1707 *value = cmd[result / sizeof(*cmd)]; 1708 i915_gem_object_unpin_map(obj); 1709 1710 goto out_vm; 1711 skip_request: 1712 i915_request_set_error_once(rq, err); 1713 i915_request_add(rq); 1714 err_unpin: 1715 i915_vma_unpin(vma); 1716 out_vm: 1717 i915_vm_put(vm); 1718 1719 if (!err) 1720 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1721 1722 return err; 1723 } 1724 1725 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) 1726 { 1727 struct i915_address_space *vm; 1728 u32 *vaddr; 1729 int err = 0; 1730 1731 vm = ctx->vm; 1732 if (!vm) 1733 return -ENODEV; 1734 1735 if (!vm->scratch[0]) { 1736 pr_err("No scratch page!\n"); 1737 return -EINVAL; 1738 } 1739 1740 vaddr = __px_vaddr(vm->scratch[0]); 1741 1742 memcpy(out, vaddr, sizeof(*out)); 1743 if (memchr_inv(vaddr, *out, PAGE_SIZE)) { 1744 pr_err("Inconsistent initial state of scratch page!\n"); 1745 err = -EINVAL; 1746 } 1747 1748 return err; 1749 } 1750 1751 static int igt_vm_isolation(void *arg) 1752 { 1753 struct drm_i915_private *i915 = arg; 1754 struct i915_gem_context *ctx_a, *ctx_b; 1755 struct drm_i915_gem_object *obj_a, *obj_b; 1756 unsigned long num_engines, count; 1757 struct intel_engine_cs *engine; 1758 struct igt_live_test t; 1759 I915_RND_STATE(prng); 1760 struct file *file; 1761 u64 vm_total; 1762 u32 expected; 1763 int err; 1764 1765 if (GRAPHICS_VER(i915) < 7) 1766 return 0; 1767 1768 /* 1769 * The simple goal here is that a write into one context is not 1770 * observed in a second (separate page tables and scratch). 1771 */ 1772 1773 file = mock_file(i915); 1774 if (IS_ERR(file)) 1775 return PTR_ERR(file); 1776 1777 err = igt_live_test_begin(&t, i915, __func__, ""); 1778 if (err) 1779 goto out_file; 1780 1781 ctx_a = live_context(i915, file); 1782 if (IS_ERR(ctx_a)) { 1783 err = PTR_ERR(ctx_a); 1784 goto out_file; 1785 } 1786 1787 ctx_b = live_context(i915, file); 1788 if (IS_ERR(ctx_b)) { 1789 err = PTR_ERR(ctx_b); 1790 goto out_file; 1791 } 1792 1793 /* We can only test vm isolation, if the vm are distinct */ 1794 if (ctx_a->vm == ctx_b->vm) 1795 goto out_file; 1796 1797 /* Read the initial state of the scratch page */ 1798 err = check_scratch_page(ctx_a, &expected); 1799 if (err) 1800 goto out_file; 1801 1802 err = check_scratch_page(ctx_b, &expected); 1803 if (err) 1804 goto out_file; 1805 1806 vm_total = ctx_a->vm->total; 1807 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1808 1809 obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE); 1810 if (IS_ERR(obj_a)) { 1811 err = PTR_ERR(obj_a); 1812 goto out_file; 1813 } 1814 1815 obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE); 1816 if (IS_ERR(obj_b)) { 1817 err = PTR_ERR(obj_b); 1818 goto put_a; 1819 } 1820 1821 count = 0; 1822 num_engines = 0; 1823 for_each_uabi_engine(engine, i915) { 1824 IGT_TIMEOUT(end_time); 1825 unsigned long this = 0; 1826 1827 if (!intel_engine_can_store_dword(engine)) 1828 continue; 1829 1830 /* Not all engines have their own GPR! */ 1831 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS) 1832 continue; 1833 1834 while (!__igt_timeout(end_time, NULL)) { 1835 u32 value = 0xc5c5c5c5; 1836 u64 offset; 1837 1838 /* Leave enough space at offset 0 for the batch */ 1839 offset = igt_random_offset(&prng, 1840 I915_GTT_PAGE_SIZE, vm_total, 1841 sizeof(u32), alignof_dword); 1842 1843 err = write_to_scratch(ctx_a, engine, obj_a, 1844 offset, 0xdeadbeef); 1845 if (err == 0) 1846 err = read_from_scratch(ctx_b, engine, obj_b, 1847 offset, &value); 1848 if (err) 1849 goto put_b; 1850 1851 if (value != expected) { 1852 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1853 engine->name, value, 1854 upper_32_bits(offset), 1855 lower_32_bits(offset), 1856 this); 1857 err = -EINVAL; 1858 goto put_b; 1859 } 1860 1861 this++; 1862 } 1863 count += this; 1864 num_engines++; 1865 } 1866 pr_info("Checked %lu scratch offsets across %lu engines\n", 1867 count, num_engines); 1868 1869 put_b: 1870 i915_gem_object_put(obj_b); 1871 put_a: 1872 i915_gem_object_put(obj_a); 1873 out_file: 1874 if (igt_live_test_end(&t)) 1875 err = -EIO; 1876 fput(file); 1877 return err; 1878 } 1879 1880 int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1881 { 1882 static const struct i915_subtest tests[] = { 1883 SUBTEST(live_nop_switch), 1884 SUBTEST(live_parallel_switch), 1885 SUBTEST(igt_ctx_exec), 1886 SUBTEST(igt_ctx_readonly), 1887 SUBTEST(igt_ctx_sseu), 1888 SUBTEST(igt_shared_ctx_exec), 1889 SUBTEST(igt_vm_isolation), 1890 }; 1891 1892 if (intel_gt_is_wedged(to_gt(i915))) 1893 return 0; 1894 1895 return i915_live_subtests(tests, i915); 1896 } 1897