1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/sort.h> 7 8 #include "gem/i915_gem_internal.h" 9 #include "gem/i915_gem_lmem.h" 10 11 #include "selftests/igt_spinner.h" 12 #include "selftests/i915_random.h" 13 14 static const unsigned int sizes[] = { 15 SZ_4K, 16 SZ_64K, 17 SZ_2M, 18 CHUNK_SZ - SZ_4K, 19 CHUNK_SZ, 20 CHUNK_SZ + SZ_4K, 21 SZ_64M, 22 }; 23 24 static struct drm_i915_gem_object * 25 create_lmem_or_internal(struct drm_i915_private *i915, size_t size) 26 { 27 struct drm_i915_gem_object *obj; 28 29 obj = i915_gem_object_create_lmem(i915, size, 0); 30 if (!IS_ERR(obj)) 31 return obj; 32 33 return i915_gem_object_create_internal(i915, size); 34 } 35 36 static int copy(struct intel_migrate *migrate, 37 int (*fn)(struct intel_migrate *migrate, 38 struct i915_gem_ww_ctx *ww, 39 struct drm_i915_gem_object *src, 40 struct drm_i915_gem_object *dst, 41 struct i915_request **out), 42 u32 sz, struct rnd_state *prng) 43 { 44 struct drm_i915_private *i915 = migrate->context->engine->i915; 45 struct drm_i915_gem_object *src, *dst; 46 struct i915_request *rq; 47 struct i915_gem_ww_ctx ww; 48 u32 *vaddr; 49 int err = 0; 50 int i; 51 52 src = create_lmem_or_internal(i915, sz); 53 if (IS_ERR(src)) 54 return 0; 55 56 sz = src->base.size; 57 dst = i915_gem_object_create_internal(i915, sz); 58 if (IS_ERR(dst)) 59 goto err_free_src; 60 61 for_i915_gem_ww(&ww, err, true) { 62 err = i915_gem_object_lock(src, &ww); 63 if (err) 64 continue; 65 66 err = i915_gem_object_lock(dst, &ww); 67 if (err) 68 continue; 69 70 vaddr = i915_gem_object_pin_map(src, I915_MAP_WC); 71 if (IS_ERR(vaddr)) { 72 err = PTR_ERR(vaddr); 73 continue; 74 } 75 76 for (i = 0; i < sz / sizeof(u32); i++) 77 vaddr[i] = i; 78 i915_gem_object_flush_map(src); 79 80 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC); 81 if (IS_ERR(vaddr)) { 82 err = PTR_ERR(vaddr); 83 goto unpin_src; 84 } 85 86 for (i = 0; i < sz / sizeof(u32); i++) 87 vaddr[i] = ~i; 88 i915_gem_object_flush_map(dst); 89 90 err = fn(migrate, &ww, src, dst, &rq); 91 if (!err) 92 continue; 93 94 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) 95 pr_err("%ps failed, size: %u\n", fn, sz); 96 if (rq) { 97 i915_request_wait(rq, 0, HZ); 98 i915_request_put(rq); 99 } 100 i915_gem_object_unpin_map(dst); 101 unpin_src: 102 i915_gem_object_unpin_map(src); 103 } 104 if (err) 105 goto err_out; 106 107 if (rq) { 108 if (i915_request_wait(rq, 0, HZ) < 0) { 109 pr_err("%ps timed out, size: %u\n", fn, sz); 110 err = -ETIME; 111 } 112 i915_request_put(rq); 113 } 114 115 for (i = 0; !err && i < sz / PAGE_SIZE; i++) { 116 int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); 117 118 if (vaddr[x] != x) { 119 pr_err("%ps failed, size: %u, offset: %zu\n", 120 fn, sz, x * sizeof(u32)); 121 igt_hexdump(vaddr + i * 1024, 4096); 122 err = -EINVAL; 123 } 124 } 125 126 i915_gem_object_unpin_map(dst); 127 i915_gem_object_unpin_map(src); 128 129 err_out: 130 i915_gem_object_put(dst); 131 err_free_src: 132 i915_gem_object_put(src); 133 134 return err; 135 } 136 137 static int intel_context_copy_ccs(struct intel_context *ce, 138 const struct i915_deps *deps, 139 struct scatterlist *sg, 140 unsigned int pat_index, 141 bool write_to_ccs, 142 struct i915_request **out) 143 { 144 u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS; 145 u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS; 146 struct sgt_dma it = sg_sgt(sg); 147 struct i915_request *rq; 148 u32 offset; 149 int err; 150 151 GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); 152 *out = NULL; 153 154 GEM_BUG_ON(ce->ring->size < SZ_64K); 155 156 offset = 0; 157 if (HAS_64K_PAGES(ce->engine->i915)) 158 offset = CHUNK_SZ; 159 160 do { 161 int len; 162 163 rq = i915_request_create(ce); 164 if (IS_ERR(rq)) { 165 err = PTR_ERR(rq); 166 goto out_ce; 167 } 168 169 if (deps) { 170 err = i915_request_await_deps(rq, deps); 171 if (err) 172 goto out_rq; 173 174 if (rq->engine->emit_init_breadcrumb) { 175 err = rq->engine->emit_init_breadcrumb(rq); 176 if (err) 177 goto out_rq; 178 } 179 180 deps = NULL; 181 } 182 183 /* The PTE updates + clear must not be interrupted. */ 184 err = emit_no_arbitration(rq); 185 if (err) 186 goto out_rq; 187 188 len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ); 189 if (len <= 0) { 190 err = len; 191 goto out_rq; 192 } 193 194 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 195 if (err) 196 goto out_rq; 197 198 err = emit_copy_ccs(rq, offset, dst_access, 199 offset, src_access, len); 200 if (err) 201 goto out_rq; 202 203 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 204 205 /* Arbitration is re-enabled between requests. */ 206 out_rq: 207 if (*out) 208 i915_request_put(*out); 209 *out = i915_request_get(rq); 210 i915_request_add(rq); 211 if (err || !it.sg || !sg_dma_len(it.sg)) 212 break; 213 214 cond_resched(); 215 } while (1); 216 217 out_ce: 218 return err; 219 } 220 221 static int 222 intel_migrate_ccs_copy(struct intel_migrate *m, 223 struct i915_gem_ww_ctx *ww, 224 const struct i915_deps *deps, 225 struct scatterlist *sg, 226 unsigned int pat_index, 227 bool write_to_ccs, 228 struct i915_request **out) 229 { 230 struct intel_context *ce; 231 int err; 232 233 *out = NULL; 234 if (!m->context) 235 return -ENODEV; 236 237 ce = intel_migrate_create_context(m); 238 if (IS_ERR(ce)) 239 ce = intel_context_get(m->context); 240 GEM_BUG_ON(IS_ERR(ce)); 241 242 err = intel_context_pin_ww(ce, ww); 243 if (err) 244 goto out; 245 246 err = intel_context_copy_ccs(ce, deps, sg, pat_index, 247 write_to_ccs, out); 248 249 intel_context_unpin(ce); 250 out: 251 intel_context_put(ce); 252 return err; 253 } 254 255 static int clear(struct intel_migrate *migrate, 256 int (*fn)(struct intel_migrate *migrate, 257 struct i915_gem_ww_ctx *ww, 258 struct drm_i915_gem_object *obj, 259 u32 value, 260 struct i915_request **out), 261 u32 sz, struct rnd_state *prng) 262 { 263 struct drm_i915_private *i915 = migrate->context->engine->i915; 264 struct drm_i915_gem_object *obj; 265 struct i915_request *rq = NULL; 266 struct i915_gem_ww_ctx ww; 267 u32 *vaddr, val = 0; 268 bool ccs_cap = false; 269 int err = 0; 270 int i; 271 272 obj = create_lmem_or_internal(i915, sz); 273 if (IS_ERR(obj)) 274 return 0; 275 276 /* Consider the rounded up memory too */ 277 sz = obj->base.size; 278 279 if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj)) 280 ccs_cap = true; 281 282 for_i915_gem_ww(&ww, err, true) { 283 int ccs_bytes, ccs_bytes_per_chunk; 284 285 err = i915_gem_object_lock(obj, &ww); 286 if (err) 287 continue; 288 289 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 290 if (IS_ERR(vaddr)) { 291 err = PTR_ERR(vaddr); 292 continue; 293 } 294 295 for (i = 0; i < sz / sizeof(u32); i++) 296 vaddr[i] = ~i; 297 i915_gem_object_flush_map(obj); 298 299 if (ccs_cap && !val) { 300 /* Write the obj data into ccs surface */ 301 err = intel_migrate_ccs_copy(migrate, &ww, NULL, 302 obj->mm.pages->sgl, 303 obj->pat_index, 304 true, &rq); 305 if (rq && !err) { 306 if (i915_request_wait(rq, 0, HZ) < 0) { 307 pr_err("%ps timed out, size: %u\n", 308 fn, sz); 309 err = -ETIME; 310 } 311 i915_request_put(rq); 312 rq = NULL; 313 } 314 if (err) 315 continue; 316 } 317 318 err = fn(migrate, &ww, obj, val, &rq); 319 if (rq && !err) { 320 if (i915_request_wait(rq, 0, HZ) < 0) { 321 pr_err("%ps timed out, size: %u\n", fn, sz); 322 err = -ETIME; 323 } 324 i915_request_put(rq); 325 rq = NULL; 326 } 327 if (err) 328 continue; 329 330 i915_gem_object_flush_map(obj); 331 332 /* Verify the set/clear of the obj mem */ 333 for (i = 0; !err && i < sz / PAGE_SIZE; i++) { 334 int x = i * 1024 + 335 i915_prandom_u32_max_state(1024, prng); 336 337 if (vaddr[x] != val) { 338 pr_err("%ps failed, (%u != %u), offset: %zu\n", 339 fn, vaddr[x], val, x * sizeof(u32)); 340 igt_hexdump(vaddr + i * 1024, 4096); 341 err = -EINVAL; 342 } 343 } 344 if (err) 345 continue; 346 347 if (ccs_cap && !val) { 348 for (i = 0; i < sz / sizeof(u32); i++) 349 vaddr[i] = ~i; 350 i915_gem_object_flush_map(obj); 351 352 err = intel_migrate_ccs_copy(migrate, &ww, NULL, 353 obj->mm.pages->sgl, 354 obj->pat_index, 355 false, &rq); 356 if (rq && !err) { 357 if (i915_request_wait(rq, 0, HZ) < 0) { 358 pr_err("%ps timed out, size: %u\n", 359 fn, sz); 360 err = -ETIME; 361 } 362 i915_request_put(rq); 363 rq = NULL; 364 } 365 if (err) 366 continue; 367 368 ccs_bytes = GET_CCS_BYTES(i915, sz); 369 ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ); 370 i915_gem_object_flush_map(obj); 371 372 for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) { 373 int offset = ((i * PAGE_SIZE) / 374 ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32); 375 int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32); 376 int x = i915_prandom_u32_max_state(min_t(int, 1024, 377 ccs_bytes_left), prng); 378 379 if (vaddr[offset + x]) { 380 pr_err("%ps ccs clearing failed, offset: %ld/%d\n", 381 fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes); 382 igt_hexdump(vaddr + offset, 383 min_t(int, 4096, 384 ccs_bytes_left * sizeof(u32))); 385 err = -EINVAL; 386 } 387 } 388 389 if (err) 390 continue; 391 } 392 i915_gem_object_unpin_map(obj); 393 } 394 395 if (err) { 396 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) 397 pr_err("%ps failed, size: %u\n", fn, sz); 398 if (rq && err != -EINVAL) { 399 i915_request_wait(rq, 0, HZ); 400 i915_request_put(rq); 401 } 402 403 i915_gem_object_unpin_map(obj); 404 } 405 406 i915_gem_object_put(obj); 407 return err; 408 } 409 410 static int __migrate_copy(struct intel_migrate *migrate, 411 struct i915_gem_ww_ctx *ww, 412 struct drm_i915_gem_object *src, 413 struct drm_i915_gem_object *dst, 414 struct i915_request **out) 415 { 416 return intel_migrate_copy(migrate, ww, NULL, 417 src->mm.pages->sgl, src->pat_index, 418 i915_gem_object_is_lmem(src), 419 dst->mm.pages->sgl, dst->pat_index, 420 i915_gem_object_is_lmem(dst), 421 out); 422 } 423 424 static int __global_copy(struct intel_migrate *migrate, 425 struct i915_gem_ww_ctx *ww, 426 struct drm_i915_gem_object *src, 427 struct drm_i915_gem_object *dst, 428 struct i915_request **out) 429 { 430 return intel_context_migrate_copy(migrate->context, NULL, 431 src->mm.pages->sgl, src->pat_index, 432 i915_gem_object_is_lmem(src), 433 dst->mm.pages->sgl, dst->pat_index, 434 i915_gem_object_is_lmem(dst), 435 out); 436 } 437 438 static int 439 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 440 { 441 return copy(migrate, __migrate_copy, sz, prng); 442 } 443 444 static int 445 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 446 { 447 return copy(migrate, __global_copy, sz, prng); 448 } 449 450 static int __migrate_clear(struct intel_migrate *migrate, 451 struct i915_gem_ww_ctx *ww, 452 struct drm_i915_gem_object *obj, 453 u32 value, 454 struct i915_request **out) 455 { 456 return intel_migrate_clear(migrate, ww, NULL, 457 obj->mm.pages->sgl, 458 obj->pat_index, 459 i915_gem_object_is_lmem(obj), 460 value, out); 461 } 462 463 static int __global_clear(struct intel_migrate *migrate, 464 struct i915_gem_ww_ctx *ww, 465 struct drm_i915_gem_object *obj, 466 u32 value, 467 struct i915_request **out) 468 { 469 return intel_context_migrate_clear(migrate->context, NULL, 470 obj->mm.pages->sgl, 471 obj->pat_index, 472 i915_gem_object_is_lmem(obj), 473 value, out); 474 } 475 476 static int 477 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 478 { 479 return clear(migrate, __migrate_clear, sz, prng); 480 } 481 482 static int 483 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 484 { 485 return clear(migrate, __global_clear, sz, prng); 486 } 487 488 static int live_migrate_copy(void *arg) 489 { 490 struct intel_gt *gt = arg; 491 struct intel_migrate *migrate = >->migrate; 492 struct drm_i915_private *i915 = migrate->context->engine->i915; 493 I915_RND_STATE(prng); 494 int i; 495 496 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 497 int err; 498 499 err = migrate_copy(migrate, sizes[i], &prng); 500 if (err == 0) 501 err = global_copy(migrate, sizes[i], &prng); 502 i915_gem_drain_freed_objects(i915); 503 if (err) 504 return err; 505 } 506 507 return 0; 508 } 509 510 static int live_migrate_clear(void *arg) 511 { 512 struct intel_gt *gt = arg; 513 struct intel_migrate *migrate = >->migrate; 514 struct drm_i915_private *i915 = migrate->context->engine->i915; 515 I915_RND_STATE(prng); 516 int i; 517 518 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 519 int err; 520 521 err = migrate_clear(migrate, sizes[i], &prng); 522 if (err == 0) 523 err = global_clear(migrate, sizes[i], &prng); 524 525 i915_gem_drain_freed_objects(i915); 526 if (err) 527 return err; 528 } 529 530 return 0; 531 } 532 533 struct spinner_timer { 534 struct timer_list timer; 535 struct igt_spinner spin; 536 }; 537 538 static void spinner_kill(struct timer_list *timer) 539 { 540 struct spinner_timer *st = timer_container_of(st, timer, timer); 541 542 igt_spinner_end(&st->spin); 543 pr_info("%s\n", __func__); 544 } 545 546 static int live_emit_pte_full_ring(void *arg) 547 { 548 struct intel_gt *gt = arg; 549 struct intel_migrate *migrate = >->migrate; 550 struct drm_i915_private *i915 = migrate->context->engine->i915; 551 struct drm_i915_gem_object *obj; 552 struct intel_context *ce; 553 struct i915_request *rq, *prev; 554 struct spinner_timer st; 555 struct sgt_dma it; 556 int len, sz, err; 557 u32 *cs; 558 559 /* 560 * Simple regression test to check that we don't trample the 561 * rq->reserved_space when returning from emit_pte(), if the ring is 562 * nearly full. 563 */ 564 565 if (igt_spinner_init(&st.spin, to_gt(i915))) 566 return -ENOMEM; 567 568 obj = i915_gem_object_create_internal(i915, 2 * PAGE_SIZE); 569 if (IS_ERR(obj)) { 570 err = PTR_ERR(obj); 571 goto out_spinner; 572 } 573 574 err = i915_gem_object_pin_pages_unlocked(obj); 575 if (err) 576 goto out_obj; 577 578 ce = intel_migrate_create_context(migrate); 579 if (IS_ERR(ce)) { 580 err = PTR_ERR(ce); 581 goto out_obj; 582 } 583 584 ce->ring_size = SZ_4K; /* Not too big */ 585 586 err = intel_context_pin(ce); 587 if (err) 588 goto out_put; 589 590 rq = igt_spinner_create_request(&st.spin, ce, MI_ARB_CHECK); 591 if (IS_ERR(rq)) { 592 err = PTR_ERR(rq); 593 goto out_unpin; 594 } 595 596 i915_request_add(rq); 597 if (!igt_wait_for_spinner(&st.spin, rq)) { 598 err = -EIO; 599 goto out_unpin; 600 } 601 602 /* 603 * Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS + 604 * ring->reserved_space at the end. To actually emit the PTEs we require 605 * slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is 606 * greater than PAGE_SIZE. The correct behaviour is to wait for more 607 * ring space in emit_pte(), otherwise we trample on the reserved_space 608 * resulting in crashes when later submitting the rq. 609 */ 610 611 prev = NULL; 612 do { 613 if (prev) 614 i915_request_add(rq); 615 616 rq = i915_request_create(ce); 617 if (IS_ERR(rq)) { 618 err = PTR_ERR(rq); 619 goto out_unpin; 620 } 621 622 sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) - 623 I915_EMIT_PTE_NUM_DWORDS; 624 sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) - 625 I915_EMIT_PTE_NUM_DWORDS); 626 cs = intel_ring_begin(rq, sz); 627 if (IS_ERR(cs)) { 628 err = PTR_ERR(cs); 629 goto out_rq; 630 } 631 632 memset32(cs, MI_NOOP, sz); 633 cs += sz; 634 intel_ring_advance(rq, cs); 635 636 pr_info("%s emit=%u sz=%d\n", __func__, rq->ring->emit, sz); 637 638 prev = rq; 639 } while (rq->ring->space > (rq->reserved_space + 640 I915_EMIT_PTE_NUM_DWORDS * sizeof(u32))); 641 642 timer_setup_on_stack(&st.timer, spinner_kill, 0); 643 mod_timer(&st.timer, jiffies + 2 * HZ); 644 645 /* 646 * This should wait for the spinner to be killed, otherwise we should go 647 * down in flames when doing i915_request_add(). 648 */ 649 pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space); 650 it = sg_sgt(obj->mm.pages->sgl); 651 len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ); 652 if (!len) { 653 err = -EINVAL; 654 goto out_rq; 655 } 656 if (len < 0) { 657 err = len; 658 goto out_rq; 659 } 660 661 out_rq: 662 i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */ 663 timer_delete_sync(&st.timer); 664 timer_destroy_on_stack(&st.timer); 665 out_unpin: 666 intel_context_unpin(ce); 667 out_put: 668 intel_context_put(ce); 669 out_obj: 670 i915_gem_object_put(obj); 671 out_spinner: 672 igt_spinner_fini(&st.spin); 673 return err; 674 } 675 676 struct threaded_migrate { 677 struct intel_migrate *migrate; 678 struct task_struct *tsk; 679 struct rnd_state prng; 680 }; 681 682 static int threaded_migrate(struct intel_migrate *migrate, 683 int (*fn)(void *arg), 684 unsigned int flags) 685 { 686 const unsigned int n_cpus = num_online_cpus() + 1; 687 struct threaded_migrate *thread; 688 I915_RND_STATE(prng); 689 unsigned int i; 690 int err = 0; 691 692 thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL); 693 if (!thread) 694 return 0; 695 696 for (i = 0; i < n_cpus; ++i) { 697 struct task_struct *tsk; 698 699 thread[i].migrate = migrate; 700 thread[i].prng = 701 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); 702 703 tsk = kthread_run(fn, &thread[i], "igt-%d", i); 704 if (IS_ERR(tsk)) { 705 err = PTR_ERR(tsk); 706 break; 707 } 708 709 get_task_struct(tsk); 710 thread[i].tsk = tsk; 711 } 712 713 /* 714 * Start all threads before we kthread_stop(). 715 * In CHV / BXT+VTD environments, where VMA pinning is committed 716 * asynchronously, empirically determined 100ms delay is needed 717 * to avoid stopping threads that may still wait for completion of 718 * intel_ggtt_bind_vma and fail with -ERESTARTSYS when interrupted. 719 */ 720 msleep((intel_vm_no_concurrent_access_wa(migrate->context->vm->i915) ? 100 : 10) * n_cpus); 721 722 for (i = 0; i < n_cpus; ++i) { 723 struct task_struct *tsk = thread[i].tsk; 724 int status; 725 726 if (IS_ERR_OR_NULL(tsk)) 727 continue; 728 729 status = kthread_stop_put(tsk); 730 if (status && !err) 731 err = status; 732 } 733 734 kfree(thread); 735 return err; 736 } 737 738 static int __thread_migrate_copy(void *arg) 739 { 740 struct threaded_migrate *tm = arg; 741 742 return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 743 } 744 745 static int thread_migrate_copy(void *arg) 746 { 747 struct intel_gt *gt = arg; 748 struct intel_migrate *migrate = >->migrate; 749 750 return threaded_migrate(migrate, __thread_migrate_copy, 0); 751 } 752 753 static int __thread_global_copy(void *arg) 754 { 755 struct threaded_migrate *tm = arg; 756 757 return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 758 } 759 760 static int thread_global_copy(void *arg) 761 { 762 struct intel_gt *gt = arg; 763 struct intel_migrate *migrate = >->migrate; 764 765 return threaded_migrate(migrate, __thread_global_copy, 0); 766 } 767 768 static int __thread_migrate_clear(void *arg) 769 { 770 struct threaded_migrate *tm = arg; 771 772 return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 773 } 774 775 static int __thread_global_clear(void *arg) 776 { 777 struct threaded_migrate *tm = arg; 778 779 return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 780 } 781 782 static int thread_migrate_clear(void *arg) 783 { 784 struct intel_gt *gt = arg; 785 struct intel_migrate *migrate = >->migrate; 786 787 return threaded_migrate(migrate, __thread_migrate_clear, 0); 788 } 789 790 static int thread_global_clear(void *arg) 791 { 792 struct intel_gt *gt = arg; 793 struct intel_migrate *migrate = >->migrate; 794 795 return threaded_migrate(migrate, __thread_global_clear, 0); 796 } 797 798 int intel_migrate_live_selftests(struct drm_i915_private *i915) 799 { 800 static const struct i915_subtest tests[] = { 801 SUBTEST(live_migrate_copy), 802 SUBTEST(live_migrate_clear), 803 SUBTEST(live_emit_pte_full_ring), 804 SUBTEST(thread_migrate_copy), 805 SUBTEST(thread_migrate_clear), 806 SUBTEST(thread_global_copy), 807 SUBTEST(thread_global_clear), 808 }; 809 struct intel_gt *gt = to_gt(i915); 810 811 if (!gt->migrate.context) 812 return 0; 813 814 return intel_gt_live_subtests(tests, gt); 815 } 816 817 static struct drm_i915_gem_object * 818 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem) 819 { 820 struct drm_i915_gem_object *obj = NULL; 821 int err; 822 823 if (try_lmem) 824 obj = i915_gem_object_create_lmem(gt->i915, sz, 0); 825 826 if (IS_ERR_OR_NULL(obj)) { 827 obj = i915_gem_object_create_internal(gt->i915, sz); 828 if (IS_ERR(obj)) 829 return obj; 830 } 831 832 i915_gem_object_trylock(obj, NULL); 833 err = i915_gem_object_pin_pages(obj); 834 if (err) { 835 i915_gem_object_unlock(obj); 836 i915_gem_object_put(obj); 837 return ERR_PTR(err); 838 } 839 840 return obj; 841 } 842 843 static int wrap_ktime_compare(const void *A, const void *B) 844 { 845 const ktime_t *a = A, *b = B; 846 847 return ktime_compare(*a, *b); 848 } 849 850 static int __perf_clear_blt(struct intel_context *ce, 851 struct scatterlist *sg, 852 unsigned int pat_index, 853 bool is_lmem, 854 size_t sz) 855 { 856 ktime_t t[5]; 857 int pass; 858 int err = 0; 859 860 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 861 struct i915_request *rq; 862 ktime_t t0, t1; 863 864 t0 = ktime_get(); 865 866 err = intel_context_migrate_clear(ce, NULL, sg, pat_index, 867 is_lmem, 0, &rq); 868 if (rq) { 869 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) 870 err = -EIO; 871 i915_request_put(rq); 872 } 873 if (err) 874 break; 875 876 t1 = ktime_get(); 877 t[pass] = ktime_sub(t1, t0); 878 } 879 if (err) 880 return err; 881 882 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 883 pr_info("%s: %zd KiB fill: %lld MiB/s\n", 884 ce->engine->name, sz >> 10, 885 div64_u64(mul_u32_u32(4 * sz, 886 1000 * 1000 * 1000), 887 t[1] + 2 * t[2] + t[3]) >> 20); 888 return 0; 889 } 890 891 static int perf_clear_blt(void *arg) 892 { 893 struct intel_gt *gt = arg; 894 static const unsigned long sizes[] = { 895 SZ_4K, 896 SZ_64K, 897 SZ_2M, 898 SZ_64M 899 }; 900 int i; 901 902 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 903 struct drm_i915_gem_object *dst; 904 int err; 905 906 dst = create_init_lmem_internal(gt, sizes[i], true); 907 if (IS_ERR(dst)) 908 return PTR_ERR(dst); 909 910 err = __perf_clear_blt(gt->migrate.context, 911 dst->mm.pages->sgl, 912 i915_gem_get_pat_index(gt->i915, 913 I915_CACHE_NONE), 914 i915_gem_object_is_lmem(dst), 915 sizes[i]); 916 917 i915_gem_object_unlock(dst); 918 i915_gem_object_put(dst); 919 if (err) 920 return err; 921 } 922 923 return 0; 924 } 925 926 static int __perf_copy_blt(struct intel_context *ce, 927 struct scatterlist *src, 928 unsigned int src_pat_index, 929 bool src_is_lmem, 930 struct scatterlist *dst, 931 unsigned int dst_pat_index, 932 bool dst_is_lmem, 933 size_t sz) 934 { 935 ktime_t t[5]; 936 int pass; 937 int err = 0; 938 939 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 940 struct i915_request *rq; 941 ktime_t t0, t1; 942 943 t0 = ktime_get(); 944 945 err = intel_context_migrate_copy(ce, NULL, 946 src, src_pat_index, 947 src_is_lmem, 948 dst, dst_pat_index, 949 dst_is_lmem, 950 &rq); 951 if (rq) { 952 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) 953 err = -EIO; 954 i915_request_put(rq); 955 } 956 if (err) 957 break; 958 959 t1 = ktime_get(); 960 t[pass] = ktime_sub(t1, t0); 961 } 962 if (err) 963 return err; 964 965 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 966 pr_info("%s: %zd KiB copy: %lld MiB/s\n", 967 ce->engine->name, sz >> 10, 968 div64_u64(mul_u32_u32(4 * sz, 969 1000 * 1000 * 1000), 970 t[1] + 2 * t[2] + t[3]) >> 20); 971 return 0; 972 } 973 974 static int perf_copy_blt(void *arg) 975 { 976 struct intel_gt *gt = arg; 977 static const unsigned long sizes[] = { 978 SZ_4K, 979 SZ_64K, 980 SZ_2M, 981 SZ_64M 982 }; 983 int i; 984 985 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 986 struct drm_i915_gem_object *src, *dst; 987 size_t sz; 988 int err; 989 990 src = create_init_lmem_internal(gt, sizes[i], true); 991 if (IS_ERR(src)) 992 return PTR_ERR(src); 993 994 sz = src->base.size; 995 dst = create_init_lmem_internal(gt, sz, false); 996 if (IS_ERR(dst)) { 997 err = PTR_ERR(dst); 998 goto err_src; 999 } 1000 1001 err = __perf_copy_blt(gt->migrate.context, 1002 src->mm.pages->sgl, 1003 i915_gem_get_pat_index(gt->i915, 1004 I915_CACHE_NONE), 1005 i915_gem_object_is_lmem(src), 1006 dst->mm.pages->sgl, 1007 i915_gem_get_pat_index(gt->i915, 1008 I915_CACHE_NONE), 1009 i915_gem_object_is_lmem(dst), 1010 sz); 1011 1012 i915_gem_object_unlock(dst); 1013 i915_gem_object_put(dst); 1014 err_src: 1015 i915_gem_object_unlock(src); 1016 i915_gem_object_put(src); 1017 if (err) 1018 return err; 1019 } 1020 1021 return 0; 1022 } 1023 1024 int intel_migrate_perf_selftests(struct drm_i915_private *i915) 1025 { 1026 static const struct i915_subtest tests[] = { 1027 SUBTEST(perf_clear_blt), 1028 SUBTEST(perf_copy_blt), 1029 }; 1030 struct intel_gt *gt = to_gt(i915); 1031 1032 if (intel_gt_is_wedged(gt)) 1033 return 0; 1034 1035 if (!gt->migrate.context) 1036 return 0; 1037 1038 return intel_gt_live_subtests(tests, gt); 1039 } 1040