1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <linux/prime_numbers.h> 7 #include <linux/sort.h> 8 9 #include <drm/drm_buddy.h> 10 11 #include "../i915_selftest.h" 12 13 #include "mock_drm.h" 14 #include "mock_gem_device.h" 15 #include "mock_region.h" 16 17 #include "gem/i915_gem_context.h" 18 #include "gem/i915_gem_lmem.h" 19 #include "gem/i915_gem_region.h" 20 #include "gem/i915_gem_ttm.h" 21 #include "gem/selftests/igt_gem_utils.h" 22 #include "gem/selftests/mock_context.h" 23 #include "gt/intel_engine_pm.h" 24 #include "gt/intel_engine_user.h" 25 #include "gt/intel_gt.h" 26 #include "gt/intel_migrate.h" 27 #include "i915_memcpy.h" 28 #include "i915_ttm_buddy_manager.h" 29 #include "selftests/igt_flush_test.h" 30 #include "selftests/i915_random.h" 31 32 static void close_objects(struct intel_memory_region *mem, 33 struct list_head *objects) 34 { 35 struct drm_i915_private *i915 = mem->i915; 36 struct drm_i915_gem_object *obj, *on; 37 38 list_for_each_entry_safe(obj, on, objects, st_link) { 39 i915_gem_object_lock(obj, NULL); 40 if (i915_gem_object_has_pinned_pages(obj)) 41 i915_gem_object_unpin_pages(obj); 42 /* No polluting the memory region between tests */ 43 __i915_gem_object_put_pages(obj); 44 i915_gem_object_unlock(obj); 45 list_del(&obj->st_link); 46 i915_gem_object_put(obj); 47 } 48 49 cond_resched(); 50 51 i915_gem_drain_freed_objects(i915); 52 } 53 54 static int igt_mock_fill(void *arg) 55 { 56 struct intel_memory_region *mem = arg; 57 resource_size_t total = resource_size(&mem->region); 58 resource_size_t page_size; 59 resource_size_t rem; 60 unsigned long max_pages; 61 unsigned long page_num; 62 LIST_HEAD(objects); 63 int err = 0; 64 65 page_size = PAGE_SIZE; 66 max_pages = div64_u64(total, page_size); 67 rem = total; 68 69 for_each_prime_number_from(page_num, 1, max_pages) { 70 resource_size_t size = page_num * page_size; 71 struct drm_i915_gem_object *obj; 72 73 obj = i915_gem_object_create_region(mem, size, 0, 0); 74 if (IS_ERR(obj)) { 75 err = PTR_ERR(obj); 76 break; 77 } 78 79 err = i915_gem_object_pin_pages_unlocked(obj); 80 if (err) { 81 i915_gem_object_put(obj); 82 break; 83 } 84 85 list_add(&obj->st_link, &objects); 86 rem -= size; 87 } 88 89 if (err == -ENOMEM) 90 err = 0; 91 if (err == -ENXIO) { 92 if (page_num * page_size <= rem) { 93 pr_err("%s failed, space still left in region\n", 94 __func__); 95 err = -EINVAL; 96 } else { 97 err = 0; 98 } 99 } 100 101 close_objects(mem, &objects); 102 103 return err; 104 } 105 106 static struct drm_i915_gem_object * 107 igt_object_create(struct intel_memory_region *mem, 108 struct list_head *objects, 109 u64 size, 110 unsigned int flags) 111 { 112 struct drm_i915_gem_object *obj; 113 int err; 114 115 obj = i915_gem_object_create_region(mem, size, 0, flags); 116 if (IS_ERR(obj)) 117 return obj; 118 119 err = i915_gem_object_pin_pages_unlocked(obj); 120 if (err) 121 goto put; 122 123 list_add(&obj->st_link, objects); 124 return obj; 125 126 put: 127 i915_gem_object_put(obj); 128 return ERR_PTR(err); 129 } 130 131 static void igt_object_release(struct drm_i915_gem_object *obj) 132 { 133 i915_gem_object_lock(obj, NULL); 134 i915_gem_object_unpin_pages(obj); 135 __i915_gem_object_put_pages(obj); 136 i915_gem_object_unlock(obj); 137 list_del(&obj->st_link); 138 i915_gem_object_put(obj); 139 } 140 141 static bool is_contiguous(struct drm_i915_gem_object *obj) 142 { 143 struct scatterlist *sg; 144 dma_addr_t addr = -1; 145 146 for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { 147 if (addr != -1 && sg_dma_address(sg) != addr) 148 return false; 149 150 addr = sg_dma_address(sg) + sg_dma_len(sg); 151 } 152 153 return true; 154 } 155 156 static int igt_mock_reserve(void *arg) 157 { 158 struct intel_memory_region *mem = arg; 159 struct drm_i915_private *i915 = mem->i915; 160 resource_size_t avail = resource_size(&mem->region); 161 struct drm_i915_gem_object *obj; 162 const u32 chunk_size = SZ_32M; 163 u32 i, offset, count, *order; 164 u64 allocated, cur_avail; 165 I915_RND_STATE(prng); 166 LIST_HEAD(objects); 167 int err = 0; 168 169 count = avail / chunk_size; 170 order = i915_random_order(count, &prng); 171 if (!order) 172 return 0; 173 174 mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0); 175 if (IS_ERR(mem)) { 176 pr_err("failed to create memory region\n"); 177 err = PTR_ERR(mem); 178 goto out_free_order; 179 } 180 181 /* Reserve a bunch of ranges within the region */ 182 for (i = 0; i < count; ++i) { 183 u64 start = order[i] * chunk_size; 184 u64 size = i915_prandom_u32_max_state(chunk_size, &prng); 185 186 /* Allow for some really big holes */ 187 if (!size) 188 continue; 189 190 size = round_up(size, PAGE_SIZE); 191 offset = igt_random_offset(&prng, 0, chunk_size, size, 192 PAGE_SIZE); 193 194 err = intel_memory_region_reserve(mem, start + offset, size); 195 if (err) { 196 pr_err("%s failed to reserve range", __func__); 197 goto out_close; 198 } 199 200 /* XXX: maybe sanity check the block range here? */ 201 avail -= size; 202 } 203 204 /* Try to see if we can allocate from the remaining space */ 205 allocated = 0; 206 cur_avail = avail; 207 do { 208 u32 size = i915_prandom_u32_max_state(cur_avail, &prng); 209 210 size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE); 211 obj = igt_object_create(mem, &objects, size, 0); 212 if (IS_ERR(obj)) { 213 if (PTR_ERR(obj) == -ENXIO) 214 break; 215 216 err = PTR_ERR(obj); 217 goto out_close; 218 } 219 cur_avail -= size; 220 allocated += size; 221 } while (1); 222 223 if (allocated != avail) { 224 pr_err("%s mismatch between allocation and free space", __func__); 225 err = -EINVAL; 226 } 227 228 out_close: 229 close_objects(mem, &objects); 230 intel_memory_region_destroy(mem); 231 out_free_order: 232 kfree(order); 233 return err; 234 } 235 236 static int igt_mock_contiguous(void *arg) 237 { 238 struct intel_memory_region *mem = arg; 239 struct drm_i915_gem_object *obj; 240 unsigned long n_objects; 241 LIST_HEAD(objects); 242 LIST_HEAD(holes); 243 I915_RND_STATE(prng); 244 resource_size_t total; 245 resource_size_t min; 246 u64 target; 247 int err = 0; 248 249 total = resource_size(&mem->region); 250 251 /* Min size */ 252 obj = igt_object_create(mem, &objects, PAGE_SIZE, 253 I915_BO_ALLOC_CONTIGUOUS); 254 if (IS_ERR(obj)) 255 return PTR_ERR(obj); 256 257 if (!is_contiguous(obj)) { 258 pr_err("%s min object spans disjoint sg entries\n", __func__); 259 err = -EINVAL; 260 goto err_close_objects; 261 } 262 263 igt_object_release(obj); 264 265 /* Max size */ 266 obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS); 267 if (IS_ERR(obj)) 268 return PTR_ERR(obj); 269 270 if (!is_contiguous(obj)) { 271 pr_err("%s max object spans disjoint sg entries\n", __func__); 272 err = -EINVAL; 273 goto err_close_objects; 274 } 275 276 igt_object_release(obj); 277 278 /* Internal fragmentation should not bleed into the object size */ 279 target = i915_prandom_u64_state(&prng); 280 div64_u64_rem(target, total, &target); 281 target = round_up(target, PAGE_SIZE); 282 target = max_t(u64, PAGE_SIZE, target); 283 284 obj = igt_object_create(mem, &objects, target, 285 I915_BO_ALLOC_CONTIGUOUS); 286 if (IS_ERR(obj)) 287 return PTR_ERR(obj); 288 289 if (obj->base.size != target) { 290 pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__, 291 obj->base.size, target); 292 err = -EINVAL; 293 goto err_close_objects; 294 } 295 296 if (!is_contiguous(obj)) { 297 pr_err("%s object spans disjoint sg entries\n", __func__); 298 err = -EINVAL; 299 goto err_close_objects; 300 } 301 302 igt_object_release(obj); 303 304 /* 305 * Try to fragment the address space, such that half of it is free, but 306 * the max contiguous block size is SZ_64K. 307 */ 308 309 target = SZ_64K; 310 n_objects = div64_u64(total, target); 311 312 while (n_objects--) { 313 struct list_head *list; 314 315 if (n_objects % 2) 316 list = &holes; 317 else 318 list = &objects; 319 320 obj = igt_object_create(mem, list, target, 321 I915_BO_ALLOC_CONTIGUOUS); 322 if (IS_ERR(obj)) { 323 err = PTR_ERR(obj); 324 goto err_close_objects; 325 } 326 } 327 328 close_objects(mem, &holes); 329 330 min = target; 331 target = total >> 1; 332 333 /* Make sure we can still allocate all the fragmented space */ 334 obj = igt_object_create(mem, &objects, target, 0); 335 if (IS_ERR(obj)) { 336 err = PTR_ERR(obj); 337 goto err_close_objects; 338 } 339 340 igt_object_release(obj); 341 342 /* 343 * Even though we have enough free space, we don't have a big enough 344 * contiguous block. Make sure that holds true. 345 */ 346 347 do { 348 bool should_fail = target > min; 349 350 obj = igt_object_create(mem, &objects, target, 351 I915_BO_ALLOC_CONTIGUOUS); 352 if (should_fail != IS_ERR(obj)) { 353 pr_err("%s target allocation(%llx) mismatch\n", 354 __func__, target); 355 err = -EINVAL; 356 goto err_close_objects; 357 } 358 359 target >>= 1; 360 } while (target >= PAGE_SIZE); 361 362 err_close_objects: 363 list_splice_tail(&holes, &objects); 364 close_objects(mem, &objects); 365 return err; 366 } 367 368 static int igt_mock_splintered_region(void *arg) 369 { 370 struct intel_memory_region *mem = arg; 371 struct drm_i915_private *i915 = mem->i915; 372 struct i915_ttm_buddy_resource *res; 373 struct drm_i915_gem_object *obj; 374 struct drm_buddy *mm; 375 unsigned int expected_order; 376 LIST_HEAD(objects); 377 u64 size; 378 int err = 0; 379 380 /* 381 * Sanity check we can still allocate everything even if the 382 * mm.max_order != mm.size. i.e our starting address space size is not a 383 * power-of-two. 384 */ 385 386 size = (SZ_4G - 1) & PAGE_MASK; 387 mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0); 388 if (IS_ERR(mem)) 389 return PTR_ERR(mem); 390 391 obj = igt_object_create(mem, &objects, size, 0); 392 if (IS_ERR(obj)) { 393 err = PTR_ERR(obj); 394 goto out_close; 395 } 396 397 res = to_ttm_buddy_resource(obj->mm.res); 398 mm = res->mm; 399 if (mm->size != size) { 400 pr_err("%s size mismatch(%llu != %llu)\n", 401 __func__, mm->size, size); 402 err = -EINVAL; 403 goto out_put; 404 } 405 406 expected_order = get_order(rounddown_pow_of_two(size)); 407 if (mm->max_order != expected_order) { 408 pr_err("%s order mismatch(%u != %u)\n", 409 __func__, mm->max_order, expected_order); 410 err = -EINVAL; 411 goto out_put; 412 } 413 414 close_objects(mem, &objects); 415 416 /* 417 * While we should be able allocate everything without any flag 418 * restrictions, if we consider I915_BO_ALLOC_CONTIGUOUS then we are 419 * actually limited to the largest power-of-two for the region size i.e 420 * max_order, due to the inner workings of the buddy allocator. So make 421 * sure that does indeed hold true. 422 */ 423 424 obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS); 425 if (!IS_ERR(obj)) { 426 pr_err("%s too large contiguous allocation was not rejected\n", 427 __func__); 428 err = -EINVAL; 429 goto out_close; 430 } 431 432 obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size), 433 I915_BO_ALLOC_CONTIGUOUS); 434 if (IS_ERR(obj)) { 435 pr_err("%s largest possible contiguous allocation failed\n", 436 __func__); 437 err = PTR_ERR(obj); 438 goto out_close; 439 } 440 441 out_close: 442 close_objects(mem, &objects); 443 out_put: 444 intel_memory_region_destroy(mem); 445 return err; 446 } 447 448 #ifndef SZ_8G 449 #define SZ_8G BIT_ULL(33) 450 #endif 451 452 static int igt_mock_max_segment(void *arg) 453 { 454 struct intel_memory_region *mem = arg; 455 struct drm_i915_private *i915 = mem->i915; 456 struct i915_ttm_buddy_resource *res; 457 struct drm_i915_gem_object *obj; 458 struct drm_buddy_block *block; 459 struct drm_buddy *mm; 460 struct list_head *blocks; 461 struct scatterlist *sg; 462 I915_RND_STATE(prng); 463 LIST_HEAD(objects); 464 unsigned int max_segment; 465 unsigned int ps; 466 u64 size; 467 int err = 0; 468 469 /* 470 * While we may create very large contiguous blocks, we may need 471 * to break those down for consumption elsewhere. In particular, 472 * dma-mapping with scatterlist elements have an implicit limit of 473 * UINT_MAX on each element. 474 */ 475 476 size = SZ_8G; 477 ps = PAGE_SIZE; 478 if (i915_prandom_u64_state(&prng) & 1) 479 ps = SZ_64K; /* For something like DG2 */ 480 481 max_segment = round_down(UINT_MAX, ps); 482 483 mem = mock_region_create(i915, 0, size, ps, 0, 0); 484 if (IS_ERR(mem)) 485 return PTR_ERR(mem); 486 487 obj = igt_object_create(mem, &objects, size, 0); 488 if (IS_ERR(obj)) { 489 err = PTR_ERR(obj); 490 goto out_put; 491 } 492 493 res = to_ttm_buddy_resource(obj->mm.res); 494 blocks = &res->blocks; 495 mm = res->mm; 496 size = 0; 497 list_for_each_entry(block, blocks, link) { 498 if (drm_buddy_block_size(mm, block) > size) 499 size = drm_buddy_block_size(mm, block); 500 } 501 if (size < max_segment) { 502 pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n", 503 __func__, max_segment, size); 504 err = -EINVAL; 505 goto out_close; 506 } 507 508 for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { 509 dma_addr_t daddr = sg_dma_address(sg); 510 511 if (sg->length > max_segment) { 512 pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", 513 __func__, sg->length, max_segment); 514 err = -EINVAL; 515 goto out_close; 516 } 517 518 if (!IS_ALIGNED(daddr, ps)) { 519 pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n", 520 __func__, &daddr, ps); 521 err = -EINVAL; 522 goto out_close; 523 } 524 } 525 526 out_close: 527 close_objects(mem, &objects); 528 out_put: 529 intel_memory_region_destroy(mem); 530 return err; 531 } 532 533 static u64 igt_object_mappable_total(struct drm_i915_gem_object *obj) 534 { 535 struct intel_memory_region *mr = obj->mm.region; 536 struct i915_ttm_buddy_resource *bman_res = 537 to_ttm_buddy_resource(obj->mm.res); 538 struct drm_buddy *mm = bman_res->mm; 539 struct drm_buddy_block *block; 540 u64 total; 541 542 total = 0; 543 list_for_each_entry(block, &bman_res->blocks, link) { 544 u64 start = drm_buddy_block_offset(block); 545 u64 end = start + drm_buddy_block_size(mm, block); 546 547 if (start < resource_size(&mr->io)) 548 total += min_t(u64, end, resource_size(&mr->io)) - start; 549 } 550 551 return total; 552 } 553 554 static int igt_mock_io_size(void *arg) 555 { 556 struct intel_memory_region *mr = arg; 557 struct drm_i915_private *i915 = mr->i915; 558 struct drm_i915_gem_object *obj; 559 u64 mappable_theft_total; 560 u64 io_size; 561 u64 total; 562 u64 ps; 563 u64 rem; 564 u64 size; 565 I915_RND_STATE(prng); 566 LIST_HEAD(objects); 567 int err = 0; 568 569 ps = SZ_4K; 570 if (i915_prandom_u64_state(&prng) & 1) 571 ps = SZ_64K; /* For something like DG2 */ 572 573 div64_u64_rem(i915_prandom_u64_state(&prng), SZ_8G, &total); 574 total = round_down(total, ps); 575 total = max_t(u64, total, SZ_1G); 576 577 div64_u64_rem(i915_prandom_u64_state(&prng), total - ps, &io_size); 578 io_size = round_down(io_size, ps); 579 io_size = max_t(u64, io_size, SZ_256M); /* 256M seems to be the common lower limit */ 580 581 pr_info("%s with ps=%llx, io_size=%llx, total=%llx\n", 582 __func__, ps, io_size, total); 583 584 mr = mock_region_create(i915, 0, total, ps, 0, io_size); 585 if (IS_ERR(mr)) { 586 err = PTR_ERR(mr); 587 goto out_err; 588 } 589 590 mappable_theft_total = 0; 591 rem = total - io_size; 592 do { 593 div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size); 594 size = round_down(size, ps); 595 size = max(size, ps); 596 597 obj = igt_object_create(mr, &objects, size, 598 I915_BO_ALLOC_GPU_ONLY); 599 if (IS_ERR(obj)) { 600 pr_err("%s TOPDOWN failed with rem=%llx, size=%llx\n", 601 __func__, rem, size); 602 err = PTR_ERR(obj); 603 goto out_close; 604 } 605 606 mappable_theft_total += igt_object_mappable_total(obj); 607 rem -= size; 608 } while (rem); 609 610 pr_info("%s mappable theft=(%lluMiB/%lluMiB), total=%lluMiB\n", 611 __func__, 612 (u64)mappable_theft_total >> 20, 613 (u64)io_size >> 20, 614 (u64)total >> 20); 615 616 /* 617 * Even if we allocate all of the non-mappable portion, we should still 618 * be able to dip into the mappable portion. 619 */ 620 obj = igt_object_create(mr, &objects, io_size, 621 I915_BO_ALLOC_GPU_ONLY); 622 if (IS_ERR(obj)) { 623 pr_err("%s allocation unexpectedly failed\n", __func__); 624 err = PTR_ERR(obj); 625 goto out_close; 626 } 627 628 close_objects(mr, &objects); 629 630 rem = io_size; 631 do { 632 div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size); 633 size = round_down(size, ps); 634 size = max(size, ps); 635 636 obj = igt_object_create(mr, &objects, size, 0); 637 if (IS_ERR(obj)) { 638 pr_err("%s MAPPABLE failed with rem=%llx, size=%llx\n", 639 __func__, rem, size); 640 err = PTR_ERR(obj); 641 goto out_close; 642 } 643 644 if (igt_object_mappable_total(obj) != size) { 645 pr_err("%s allocation is not mappable(size=%llx)\n", 646 __func__, size); 647 err = -EINVAL; 648 goto out_close; 649 } 650 rem -= size; 651 } while (rem); 652 653 /* 654 * We assume CPU access is required by default, which should result in a 655 * failure here, even though the non-mappable portion is free. 656 */ 657 obj = igt_object_create(mr, &objects, ps, 0); 658 if (!IS_ERR(obj)) { 659 pr_err("%s allocation unexpectedly succeeded\n", __func__); 660 err = -EINVAL; 661 goto out_close; 662 } 663 664 out_close: 665 close_objects(mr, &objects); 666 intel_memory_region_destroy(mr); 667 out_err: 668 if (err == -ENOMEM) 669 err = 0; 670 671 return err; 672 } 673 674 static int igt_gpu_write_dw(struct intel_context *ce, 675 struct i915_vma *vma, 676 u32 dword, 677 u32 value) 678 { 679 return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32), 680 vma->size >> PAGE_SHIFT, value); 681 } 682 683 static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) 684 { 685 unsigned long n = obj->base.size >> PAGE_SHIFT; 686 u32 *ptr; 687 int err; 688 689 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 690 if (err) 691 return err; 692 693 ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); 694 if (IS_ERR(ptr)) 695 return PTR_ERR(ptr); 696 697 ptr += dword; 698 while (n--) { 699 if (*ptr != val) { 700 pr_err("base[%u]=%08x, val=%08x\n", 701 dword, *ptr, val); 702 err = -EINVAL; 703 break; 704 } 705 706 ptr += PAGE_SIZE / sizeof(*ptr); 707 } 708 709 i915_gem_object_unpin_map(obj); 710 return err; 711 } 712 713 static int igt_gpu_write(struct i915_gem_context *ctx, 714 struct drm_i915_gem_object *obj) 715 { 716 struct i915_gem_engines *engines; 717 struct i915_gem_engines_iter it; 718 struct i915_address_space *vm; 719 struct intel_context *ce; 720 I915_RND_STATE(prng); 721 IGT_TIMEOUT(end_time); 722 unsigned int count; 723 struct i915_vma *vma; 724 int *order; 725 int i, n; 726 int err = 0; 727 728 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 729 730 n = 0; 731 count = 0; 732 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 733 count++; 734 if (!intel_engine_can_store_dword(ce->engine)) 735 continue; 736 737 vm = ce->vm; 738 n++; 739 } 740 i915_gem_context_unlock_engines(ctx); 741 if (!n) 742 return 0; 743 744 order = i915_random_order(count * count, &prng); 745 if (!order) 746 return -ENOMEM; 747 748 vma = i915_vma_instance(obj, vm, NULL); 749 if (IS_ERR(vma)) { 750 err = PTR_ERR(vma); 751 goto out_free; 752 } 753 754 err = i915_vma_pin(vma, 0, 0, PIN_USER); 755 if (err) 756 goto out_free; 757 758 i = 0; 759 engines = i915_gem_context_lock_engines(ctx); 760 do { 761 u32 rng = prandom_u32_state(&prng); 762 u32 dword = offset_in_page(rng) / 4; 763 764 ce = engines->engines[order[i] % engines->num_engines]; 765 i = (i + 1) % (count * count); 766 if (!ce || !intel_engine_can_store_dword(ce->engine)) 767 continue; 768 769 err = igt_gpu_write_dw(ce, vma, dword, rng); 770 if (err) 771 break; 772 773 i915_gem_object_lock(obj, NULL); 774 err = igt_cpu_check(obj, dword, rng); 775 i915_gem_object_unlock(obj); 776 if (err) 777 break; 778 } while (!__igt_timeout(end_time, NULL)); 779 i915_gem_context_unlock_engines(ctx); 780 781 out_free: 782 kfree(order); 783 784 if (err == -ENOMEM) 785 err = 0; 786 787 return err; 788 } 789 790 static int igt_lmem_create(void *arg) 791 { 792 struct drm_i915_private *i915 = arg; 793 struct drm_i915_gem_object *obj; 794 int err = 0; 795 796 obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0); 797 if (IS_ERR(obj)) 798 return PTR_ERR(obj); 799 800 err = i915_gem_object_pin_pages_unlocked(obj); 801 if (err) 802 goto out_put; 803 804 i915_gem_object_unpin_pages(obj); 805 out_put: 806 i915_gem_object_put(obj); 807 808 return err; 809 } 810 811 static int igt_lmem_create_with_ps(void *arg) 812 { 813 struct drm_i915_private *i915 = arg; 814 int err = 0; 815 u32 ps; 816 817 for (ps = PAGE_SIZE; ps <= SZ_1G; ps <<= 1) { 818 struct drm_i915_gem_object *obj; 819 dma_addr_t daddr; 820 821 obj = __i915_gem_object_create_lmem_with_ps(i915, ps, ps, 0); 822 if (IS_ERR(obj)) { 823 err = PTR_ERR(obj); 824 if (err == -ENXIO || err == -E2BIG) { 825 pr_info("%s not enough lmem for ps(%u) err=%d\n", 826 __func__, ps, err); 827 err = 0; 828 } 829 830 break; 831 } 832 833 if (obj->base.size != ps) { 834 pr_err("%s size(%zu) != ps(%u)\n", 835 __func__, obj->base.size, ps); 836 err = -EINVAL; 837 goto out_put; 838 } 839 840 i915_gem_object_lock(obj, NULL); 841 err = i915_gem_object_pin_pages(obj); 842 if (err) { 843 if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) { 844 pr_info("%s not enough lmem for ps(%u) err=%d\n", 845 __func__, ps, err); 846 err = 0; 847 } 848 goto out_put; 849 } 850 851 daddr = i915_gem_object_get_dma_address(obj, 0); 852 if (!IS_ALIGNED(daddr, ps)) { 853 pr_err("%s daddr(%pa) not aligned with ps(%u)\n", 854 __func__, &daddr, ps); 855 err = -EINVAL; 856 goto out_unpin; 857 } 858 859 out_unpin: 860 i915_gem_object_unpin_pages(obj); 861 __i915_gem_object_put_pages(obj); 862 out_put: 863 i915_gem_object_unlock(obj); 864 i915_gem_object_put(obj); 865 866 if (err) 867 break; 868 } 869 870 return err; 871 } 872 873 static int igt_lmem_create_cleared_cpu(void *arg) 874 { 875 struct drm_i915_private *i915 = arg; 876 I915_RND_STATE(prng); 877 IGT_TIMEOUT(end_time); 878 u32 size, i; 879 int err; 880 881 i915_gem_drain_freed_objects(i915); 882 883 size = max_t(u32, PAGE_SIZE, i915_prandom_u32_max_state(SZ_32M, &prng)); 884 size = round_up(size, PAGE_SIZE); 885 i = 0; 886 887 do { 888 struct drm_i915_gem_object *obj; 889 unsigned int flags; 890 u32 dword, val; 891 void *vaddr; 892 893 /* 894 * Alternate between cleared and uncleared allocations, while 895 * also dirtying the pages each time to check that the pages are 896 * always cleared if requested, since we should get some overlap 897 * of the underlying pages, if not all, since we are the only 898 * user. 899 */ 900 901 flags = I915_BO_ALLOC_CPU_CLEAR; 902 if (i & 1) 903 flags = 0; 904 905 obj = i915_gem_object_create_lmem(i915, size, flags); 906 if (IS_ERR(obj)) 907 return PTR_ERR(obj); 908 909 i915_gem_object_lock(obj, NULL); 910 err = i915_gem_object_pin_pages(obj); 911 if (err) 912 goto out_put; 913 914 dword = i915_prandom_u32_max_state(PAGE_SIZE / sizeof(u32), 915 &prng); 916 917 if (flags & I915_BO_ALLOC_CPU_CLEAR) { 918 err = igt_cpu_check(obj, dword, 0); 919 if (err) { 920 pr_err("%s failed with size=%u, flags=%u\n", 921 __func__, size, flags); 922 goto out_unpin; 923 } 924 } 925 926 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 927 if (IS_ERR(vaddr)) { 928 err = PTR_ERR(vaddr); 929 goto out_unpin; 930 } 931 932 val = prandom_u32_state(&prng); 933 934 memset32(vaddr, val, obj->base.size / sizeof(u32)); 935 936 i915_gem_object_flush_map(obj); 937 i915_gem_object_unpin_map(obj); 938 out_unpin: 939 i915_gem_object_unpin_pages(obj); 940 __i915_gem_object_put_pages(obj); 941 out_put: 942 i915_gem_object_unlock(obj); 943 i915_gem_object_put(obj); 944 945 if (err) 946 break; 947 ++i; 948 } while (!__igt_timeout(end_time, NULL)); 949 950 pr_info("%s completed (%u) iterations\n", __func__, i); 951 952 return err; 953 } 954 955 static int igt_lmem_write_gpu(void *arg) 956 { 957 struct drm_i915_private *i915 = arg; 958 struct drm_i915_gem_object *obj; 959 struct i915_gem_context *ctx; 960 struct file *file; 961 I915_RND_STATE(prng); 962 u32 sz; 963 int err; 964 965 file = mock_file(i915); 966 if (IS_ERR(file)) 967 return PTR_ERR(file); 968 969 ctx = live_context(i915, file); 970 if (IS_ERR(ctx)) { 971 err = PTR_ERR(ctx); 972 goto out_file; 973 } 974 975 sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); 976 977 obj = i915_gem_object_create_lmem(i915, sz, 0); 978 if (IS_ERR(obj)) { 979 err = PTR_ERR(obj); 980 goto out_file; 981 } 982 983 err = i915_gem_object_pin_pages_unlocked(obj); 984 if (err) 985 goto out_put; 986 987 err = igt_gpu_write(ctx, obj); 988 if (err) 989 pr_err("igt_gpu_write failed(%d)\n", err); 990 991 i915_gem_object_unpin_pages(obj); 992 out_put: 993 i915_gem_object_put(obj); 994 out_file: 995 fput(file); 996 return err; 997 } 998 999 static struct intel_engine_cs * 1000 random_engine_class(struct drm_i915_private *i915, 1001 unsigned int class, 1002 struct rnd_state *prng) 1003 { 1004 struct intel_engine_cs *engine; 1005 unsigned int count; 1006 1007 count = 0; 1008 for (engine = intel_engine_lookup_user(i915, class, 0); 1009 engine && engine->uabi_class == class; 1010 engine = rb_entry_safe(rb_next(&engine->uabi_node), 1011 typeof(*engine), uabi_node)) 1012 count++; 1013 1014 count = i915_prandom_u32_max_state(count, prng); 1015 return intel_engine_lookup_user(i915, class, count); 1016 } 1017 1018 static int igt_lmem_write_cpu(void *arg) 1019 { 1020 struct drm_i915_private *i915 = arg; 1021 struct drm_i915_gem_object *obj; 1022 I915_RND_STATE(prng); 1023 IGT_TIMEOUT(end_time); 1024 u32 bytes[] = { 1025 0, /* rng placeholder */ 1026 sizeof(u32), 1027 sizeof(u64), 1028 64, /* cl */ 1029 PAGE_SIZE, 1030 PAGE_SIZE - sizeof(u32), 1031 PAGE_SIZE - sizeof(u64), 1032 PAGE_SIZE - 64, 1033 }; 1034 struct intel_engine_cs *engine; 1035 struct i915_request *rq; 1036 u32 *vaddr; 1037 u32 sz; 1038 u32 i; 1039 int *order; 1040 int count; 1041 int err; 1042 1043 engine = random_engine_class(i915, I915_ENGINE_CLASS_COPY, &prng); 1044 if (!engine) 1045 return 0; 1046 1047 pr_info("%s: using %s\n", __func__, engine->name); 1048 1049 sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); 1050 sz = max_t(u32, 2 * PAGE_SIZE, sz); 1051 1052 obj = i915_gem_object_create_lmem(i915, sz, I915_BO_ALLOC_CONTIGUOUS); 1053 if (IS_ERR(obj)) 1054 return PTR_ERR(obj); 1055 1056 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 1057 if (IS_ERR(vaddr)) { 1058 err = PTR_ERR(vaddr); 1059 goto out_put; 1060 } 1061 1062 i915_gem_object_lock(obj, NULL); 1063 1064 err = dma_resv_reserve_fences(obj->base.resv, 1); 1065 if (err) { 1066 i915_gem_object_unlock(obj); 1067 goto out_put; 1068 } 1069 1070 /* Put the pages into a known state -- from the gpu for added fun */ 1071 intel_engine_pm_get(engine); 1072 err = intel_context_migrate_clear(engine->gt->migrate.context, NULL, 1073 obj->mm.pages->sgl, 1074 i915_gem_get_pat_index(i915, 1075 I915_CACHE_NONE), 1076 true, 0xdeadbeaf, &rq); 1077 if (rq) { 1078 dma_resv_add_fence(obj->base.resv, &rq->fence, 1079 DMA_RESV_USAGE_WRITE); 1080 i915_request_put(rq); 1081 } 1082 1083 intel_engine_pm_put(engine); 1084 if (!err) 1085 err = i915_gem_object_set_to_wc_domain(obj, true); 1086 i915_gem_object_unlock(obj); 1087 if (err) 1088 goto out_unpin; 1089 1090 count = ARRAY_SIZE(bytes); 1091 order = i915_random_order(count * count, &prng); 1092 if (!order) { 1093 err = -ENOMEM; 1094 goto out_unpin; 1095 } 1096 1097 /* A random multiple of u32, picked between [64, PAGE_SIZE - 64] */ 1098 bytes[0] = igt_random_offset(&prng, 64, PAGE_SIZE - 64, 0, sizeof(u32)); 1099 GEM_BUG_ON(!IS_ALIGNED(bytes[0], sizeof(u32))); 1100 1101 i = 0; 1102 do { 1103 u32 offset; 1104 u32 align; 1105 u32 dword; 1106 u32 size; 1107 u32 val; 1108 1109 size = bytes[order[i] % count]; 1110 i = (i + 1) % (count * count); 1111 1112 align = bytes[order[i] % count]; 1113 i = (i + 1) % (count * count); 1114 1115 align = max_t(u32, sizeof(u32), rounddown_pow_of_two(align)); 1116 1117 offset = igt_random_offset(&prng, 0, obj->base.size, 1118 size, align); 1119 1120 val = prandom_u32_state(&prng); 1121 memset32(vaddr + offset / sizeof(u32), val ^ 0xdeadbeaf, 1122 size / sizeof(u32)); 1123 1124 /* 1125 * Sample random dw -- don't waste precious time reading every 1126 * single dw. 1127 */ 1128 dword = igt_random_offset(&prng, offset, 1129 offset + size, 1130 sizeof(u32), sizeof(u32)); 1131 dword /= sizeof(u32); 1132 if (vaddr[dword] != (val ^ 0xdeadbeaf)) { 1133 pr_err("%s vaddr[%u]=%u, val=%u, size=%u, align=%u, offset=%u\n", 1134 __func__, dword, vaddr[dword], val ^ 0xdeadbeaf, 1135 size, align, offset); 1136 err = -EINVAL; 1137 break; 1138 } 1139 } while (!__igt_timeout(end_time, NULL)); 1140 1141 out_unpin: 1142 i915_gem_object_unpin_map(obj); 1143 out_put: 1144 i915_gem_object_put(obj); 1145 1146 return err; 1147 } 1148 1149 static const char *repr_type(u32 type) 1150 { 1151 switch (type) { 1152 case I915_MAP_WB: 1153 return "WB"; 1154 case I915_MAP_WC: 1155 return "WC"; 1156 } 1157 1158 return ""; 1159 } 1160 1161 static struct drm_i915_gem_object * 1162 create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, 1163 void **out_addr) 1164 { 1165 struct drm_i915_gem_object *obj; 1166 void *addr; 1167 1168 obj = i915_gem_object_create_region(mr, size, 0, 0); 1169 if (IS_ERR(obj)) { 1170 if (PTR_ERR(obj) == -ENOSPC) /* Stolen memory */ 1171 return ERR_PTR(-ENODEV); 1172 return obj; 1173 } 1174 1175 addr = i915_gem_object_pin_map_unlocked(obj, type); 1176 if (IS_ERR(addr)) { 1177 i915_gem_object_put(obj); 1178 if (PTR_ERR(addr) == -ENXIO) 1179 return ERR_PTR(-ENODEV); 1180 return addr; 1181 } 1182 1183 *out_addr = addr; 1184 return obj; 1185 } 1186 1187 static int wrap_ktime_compare(const void *A, const void *B) 1188 { 1189 const ktime_t *a = A, *b = B; 1190 1191 return ktime_compare(*a, *b); 1192 } 1193 1194 static void igt_memcpy_long(void *dst, const void *src, size_t size) 1195 { 1196 unsigned long *tmp = dst; 1197 const unsigned long *s = src; 1198 1199 size = size / sizeof(unsigned long); 1200 while (size--) 1201 *tmp++ = *s++; 1202 } 1203 1204 static inline void igt_memcpy(void *dst, const void *src, size_t size) 1205 { 1206 memcpy(dst, src, size); 1207 } 1208 1209 static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size) 1210 { 1211 i915_memcpy_from_wc(dst, src, size); 1212 } 1213 1214 static int _perf_memcpy(struct intel_memory_region *src_mr, 1215 struct intel_memory_region *dst_mr, 1216 u64 size, u32 src_type, u32 dst_type) 1217 { 1218 struct drm_i915_private *i915 = src_mr->i915; 1219 const struct { 1220 const char *name; 1221 void (*copy)(void *dst, const void *src, size_t size); 1222 bool skip; 1223 } tests[] = { 1224 { 1225 "memcpy", 1226 igt_memcpy, 1227 }, 1228 { 1229 "memcpy_long", 1230 igt_memcpy_long, 1231 }, 1232 { 1233 "memcpy_from_wc", 1234 igt_memcpy_from_wc, 1235 !i915_has_memcpy_from_wc(), 1236 }, 1237 }; 1238 struct drm_i915_gem_object *src, *dst; 1239 void *src_addr, *dst_addr; 1240 int ret = 0; 1241 int i; 1242 1243 src = create_region_for_mapping(src_mr, size, src_type, &src_addr); 1244 if (IS_ERR(src)) { 1245 ret = PTR_ERR(src); 1246 goto out; 1247 } 1248 1249 dst = create_region_for_mapping(dst_mr, size, dst_type, &dst_addr); 1250 if (IS_ERR(dst)) { 1251 ret = PTR_ERR(dst); 1252 goto out_unpin_src; 1253 } 1254 1255 for (i = 0; i < ARRAY_SIZE(tests); ++i) { 1256 ktime_t t[5]; 1257 int pass; 1258 1259 if (tests[i].skip) 1260 continue; 1261 1262 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 1263 ktime_t t0, t1; 1264 1265 t0 = ktime_get(); 1266 1267 tests[i].copy(dst_addr, src_addr, size); 1268 1269 t1 = ktime_get(); 1270 t[pass] = ktime_sub(t1, t0); 1271 } 1272 1273 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 1274 if (t[0] <= 0) { 1275 /* ignore the impossible to protect our sanity */ 1276 pr_debug("Skipping %s src(%s, %s) -> dst(%s, %s) %14s %4lluKiB copy, unstable measurement [%lld, %lld]\n", 1277 __func__, 1278 src_mr->name, repr_type(src_type), 1279 dst_mr->name, repr_type(dst_type), 1280 tests[i].name, size >> 10, 1281 t[0], t[4]); 1282 continue; 1283 } 1284 1285 pr_info("%s src(%s, %s) -> dst(%s, %s) %14s %4llu KiB copy: %5lld MiB/s\n", 1286 __func__, 1287 src_mr->name, repr_type(src_type), 1288 dst_mr->name, repr_type(dst_type), 1289 tests[i].name, size >> 10, 1290 div64_u64(mul_u32_u32(4 * size, 1291 1000 * 1000 * 1000), 1292 t[1] + 2 * t[2] + t[3]) >> 20); 1293 1294 cond_resched(); 1295 } 1296 1297 i915_gem_object_unpin_map(dst); 1298 i915_gem_object_put(dst); 1299 out_unpin_src: 1300 i915_gem_object_unpin_map(src); 1301 i915_gem_object_put(src); 1302 1303 i915_gem_drain_freed_objects(i915); 1304 out: 1305 if (ret == -ENODEV) 1306 ret = 0; 1307 1308 return ret; 1309 } 1310 1311 static int perf_memcpy(void *arg) 1312 { 1313 struct drm_i915_private *i915 = arg; 1314 static const u32 types[] = { 1315 I915_MAP_WB, 1316 I915_MAP_WC, 1317 }; 1318 static const u32 sizes[] = { 1319 SZ_4K, 1320 SZ_64K, 1321 SZ_4M, 1322 }; 1323 struct intel_memory_region *src_mr, *dst_mr; 1324 int src_id, dst_id; 1325 int i, j, k; 1326 int ret; 1327 1328 for_each_memory_region(src_mr, i915, src_id) { 1329 for_each_memory_region(dst_mr, i915, dst_id) { 1330 for (i = 0; i < ARRAY_SIZE(sizes); ++i) { 1331 for (j = 0; j < ARRAY_SIZE(types); ++j) { 1332 for (k = 0; k < ARRAY_SIZE(types); ++k) { 1333 ret = _perf_memcpy(src_mr, 1334 dst_mr, 1335 sizes[i], 1336 types[j], 1337 types[k]); 1338 if (ret) 1339 return ret; 1340 } 1341 } 1342 } 1343 } 1344 } 1345 1346 return 0; 1347 } 1348 1349 int intel_memory_region_mock_selftests(void) 1350 { 1351 static const struct i915_subtest tests[] = { 1352 SUBTEST(igt_mock_reserve), 1353 SUBTEST(igt_mock_fill), 1354 SUBTEST(igt_mock_contiguous), 1355 SUBTEST(igt_mock_splintered_region), 1356 SUBTEST(igt_mock_max_segment), 1357 SUBTEST(igt_mock_io_size), 1358 }; 1359 struct intel_memory_region *mem; 1360 struct drm_i915_private *i915; 1361 int err; 1362 1363 i915 = mock_gem_device(); 1364 if (!i915) 1365 return -ENOMEM; 1366 1367 mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0); 1368 if (IS_ERR(mem)) { 1369 pr_err("failed to create memory region\n"); 1370 err = PTR_ERR(mem); 1371 goto out_unref; 1372 } 1373 1374 err = i915_subtests(tests, mem); 1375 1376 intel_memory_region_destroy(mem); 1377 out_unref: 1378 mock_destroy_device(i915); 1379 return err; 1380 } 1381 1382 int intel_memory_region_live_selftests(struct drm_i915_private *i915) 1383 { 1384 static const struct i915_subtest tests[] = { 1385 SUBTEST(igt_lmem_create), 1386 SUBTEST(igt_lmem_create_with_ps), 1387 SUBTEST(igt_lmem_create_cleared_cpu), 1388 SUBTEST(igt_lmem_write_cpu), 1389 SUBTEST(igt_lmem_write_gpu), 1390 }; 1391 1392 if (!HAS_LMEM(i915)) { 1393 pr_info("device lacks LMEM support, skipping\n"); 1394 return 0; 1395 } 1396 1397 if (intel_gt_is_wedged(to_gt(i915))) 1398 return 0; 1399 1400 return i915_live_subtests(tests, i915); 1401 } 1402 1403 int intel_memory_region_perf_selftests(struct drm_i915_private *i915) 1404 { 1405 static const struct i915_subtest tests[] = { 1406 SUBTEST(perf_memcpy), 1407 }; 1408 1409 if (intel_gt_is_wedged(to_gt(i915))) 1410 return 0; 1411 1412 return i915_live_subtests(tests, i915); 1413 } 1414