1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 #include <linux/string_helpers.h> 9 #include <linux/swap.h> 10 11 #include "i915_selftest.h" 12 13 #include "gem/i915_gem_internal.h" 14 #include "gem/i915_gem_lmem.h" 15 #include "gem/i915_gem_pm.h" 16 #include "gem/i915_gem_region.h" 17 18 #include "gt/intel_gt.h" 19 20 #include "igt_gem_utils.h" 21 #include "mock_context.h" 22 23 #include "selftests/mock_drm.h" 24 #include "selftests/mock_gem_device.h" 25 #include "selftests/mock_region.h" 26 #include "selftests/i915_random.h" 27 28 static struct i915_gem_context *hugepage_ctx(struct drm_i915_private *i915, 29 struct file *file) 30 { 31 struct i915_gem_context *ctx = live_context(i915, file); 32 struct i915_address_space *vm; 33 34 if (IS_ERR(ctx)) 35 return ctx; 36 37 vm = ctx->vm; 38 if (vm) 39 WRITE_ONCE(vm->scrub_64K, true); 40 41 return ctx; 42 } 43 44 static const unsigned int page_sizes[] = { 45 I915_GTT_PAGE_SIZE_2M, 46 I915_GTT_PAGE_SIZE_64K, 47 I915_GTT_PAGE_SIZE_4K, 48 }; 49 50 static unsigned int get_largest_page_size(struct drm_i915_private *i915, 51 u64 rem) 52 { 53 int i; 54 55 for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { 56 unsigned int page_size = page_sizes[i]; 57 58 if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size) 59 return page_size; 60 } 61 62 return 0; 63 } 64 65 static void huge_pages_free_pages(struct sg_table *st) 66 { 67 struct scatterlist *sg; 68 69 for (sg = st->sgl; sg; sg = __sg_next(sg)) { 70 if (sg_page(sg)) 71 __free_pages(sg_page(sg), get_order(sg->length)); 72 } 73 74 sg_free_table(st); 75 kfree(st); 76 } 77 78 static int get_huge_pages(struct drm_i915_gem_object *obj) 79 { 80 #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) 81 unsigned int page_mask = obj->mm.page_mask; 82 struct sg_table *st; 83 struct scatterlist *sg; 84 unsigned int sg_page_sizes; 85 u64 rem; 86 87 /* restricted by sg_alloc_table */ 88 if (overflows_type(obj->base.size >> PAGE_SHIFT, unsigned int)) 89 return -E2BIG; 90 91 st = kmalloc(sizeof(*st), GFP); 92 if (!st) 93 return -ENOMEM; 94 95 if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { 96 kfree(st); 97 return -ENOMEM; 98 } 99 100 rem = obj->base.size; 101 sg = st->sgl; 102 st->nents = 0; 103 sg_page_sizes = 0; 104 105 /* 106 * Our goal here is simple, we want to greedily fill the object from 107 * largest to smallest page-size, while ensuring that we use *every* 108 * page-size as per the given page-mask. 109 */ 110 do { 111 unsigned int bit = ilog2(page_mask); 112 unsigned int page_size = BIT(bit); 113 int order = get_order(page_size); 114 115 do { 116 struct page *page; 117 118 GEM_BUG_ON(order > MAX_PAGE_ORDER); 119 page = alloc_pages(GFP | __GFP_ZERO, order); 120 if (!page) 121 goto err; 122 123 sg_set_page(sg, page, page_size, 0); 124 sg_page_sizes |= page_size; 125 st->nents++; 126 127 rem -= page_size; 128 if (!rem) { 129 sg_mark_end(sg); 130 break; 131 } 132 133 sg = __sg_next(sg); 134 } while ((rem - ((page_size-1) & page_mask)) >= page_size); 135 136 page_mask &= (page_size-1); 137 } while (page_mask); 138 139 if (i915_gem_gtt_prepare_pages(obj, st)) 140 goto err; 141 142 GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); 143 __i915_gem_object_set_pages(obj, st); 144 145 return 0; 146 147 err: 148 sg_set_page(sg, NULL, 0, 0); 149 sg_mark_end(sg); 150 huge_pages_free_pages(st); 151 152 return -ENOMEM; 153 } 154 155 static void put_huge_pages(struct drm_i915_gem_object *obj, 156 struct sg_table *pages) 157 { 158 i915_gem_gtt_finish_pages(obj, pages); 159 huge_pages_free_pages(pages); 160 161 obj->mm.dirty = false; 162 163 __start_cpu_write(obj); 164 } 165 166 static const struct drm_i915_gem_object_ops huge_page_ops = { 167 .name = "huge-gem", 168 .flags = I915_GEM_OBJECT_IS_SHRINKABLE, 169 .get_pages = get_huge_pages, 170 .put_pages = put_huge_pages, 171 }; 172 173 static struct drm_i915_gem_object * 174 huge_pages_object(struct drm_i915_private *i915, 175 u64 size, 176 unsigned int page_mask) 177 { 178 static struct lock_class_key lock_class; 179 struct drm_i915_gem_object *obj; 180 unsigned int cache_level; 181 182 GEM_BUG_ON(!size); 183 GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); 184 185 if (size >> PAGE_SHIFT > INT_MAX) 186 return ERR_PTR(-E2BIG); 187 188 if (overflows_type(size, obj->base.size)) 189 return ERR_PTR(-E2BIG); 190 191 obj = i915_gem_object_alloc(); 192 if (!obj) 193 return ERR_PTR(-ENOMEM); 194 195 drm_gem_private_object_init(&i915->drm, &obj->base, size); 196 i915_gem_object_init(obj, &huge_page_ops, &lock_class, 0); 197 obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; 198 i915_gem_object_set_volatile(obj); 199 200 obj->write_domain = I915_GEM_DOMAIN_CPU; 201 obj->read_domains = I915_GEM_DOMAIN_CPU; 202 203 cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; 204 i915_gem_object_set_cache_coherency(obj, cache_level); 205 206 obj->mm.page_mask = page_mask; 207 208 return obj; 209 } 210 211 static int fake_get_huge_pages(struct drm_i915_gem_object *obj) 212 { 213 struct drm_i915_private *i915 = to_i915(obj->base.dev); 214 const u64 max_len = rounddown_pow_of_two(UINT_MAX); 215 struct sg_table *st; 216 struct scatterlist *sg; 217 u64 rem; 218 219 /* restricted by sg_alloc_table */ 220 if (overflows_type(obj->base.size >> PAGE_SHIFT, unsigned int)) 221 return -E2BIG; 222 223 st = kmalloc(sizeof(*st), GFP); 224 if (!st) 225 return -ENOMEM; 226 227 if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { 228 kfree(st); 229 return -ENOMEM; 230 } 231 232 /* Use optimal page sized chunks to fill in the sg table */ 233 rem = obj->base.size; 234 sg = st->sgl; 235 st->nents = 0; 236 do { 237 unsigned int page_size = get_largest_page_size(i915, rem); 238 unsigned int len = min(page_size * div_u64(rem, page_size), 239 max_len); 240 241 GEM_BUG_ON(!page_size); 242 243 sg->offset = 0; 244 sg->length = len; 245 sg_dma_len(sg) = len; 246 sg_dma_address(sg) = page_size; 247 248 st->nents++; 249 250 rem -= len; 251 if (!rem) { 252 sg_mark_end(sg); 253 break; 254 } 255 256 sg = sg_next(sg); 257 } while (1); 258 259 i915_sg_trim(st); 260 261 __i915_gem_object_set_pages(obj, st); 262 263 return 0; 264 } 265 266 static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) 267 { 268 struct drm_i915_private *i915 = to_i915(obj->base.dev); 269 struct sg_table *st; 270 struct scatterlist *sg; 271 unsigned int page_size; 272 273 st = kmalloc(sizeof(*st), GFP); 274 if (!st) 275 return -ENOMEM; 276 277 if (sg_alloc_table(st, 1, GFP)) { 278 kfree(st); 279 return -ENOMEM; 280 } 281 282 sg = st->sgl; 283 st->nents = 1; 284 285 page_size = get_largest_page_size(i915, obj->base.size); 286 GEM_BUG_ON(!page_size); 287 288 sg->offset = 0; 289 sg->length = obj->base.size; 290 sg_dma_len(sg) = obj->base.size; 291 sg_dma_address(sg) = page_size; 292 293 __i915_gem_object_set_pages(obj, st); 294 295 return 0; 296 #undef GFP 297 } 298 299 static void fake_free_huge_pages(struct drm_i915_gem_object *obj, 300 struct sg_table *pages) 301 { 302 sg_free_table(pages); 303 kfree(pages); 304 } 305 306 static void fake_put_huge_pages(struct drm_i915_gem_object *obj, 307 struct sg_table *pages) 308 { 309 fake_free_huge_pages(obj, pages); 310 obj->mm.dirty = false; 311 } 312 313 static const struct drm_i915_gem_object_ops fake_ops = { 314 .name = "fake-gem", 315 .flags = I915_GEM_OBJECT_IS_SHRINKABLE, 316 .get_pages = fake_get_huge_pages, 317 .put_pages = fake_put_huge_pages, 318 }; 319 320 static const struct drm_i915_gem_object_ops fake_ops_single = { 321 .name = "fake-gem", 322 .flags = I915_GEM_OBJECT_IS_SHRINKABLE, 323 .get_pages = fake_get_huge_pages_single, 324 .put_pages = fake_put_huge_pages, 325 }; 326 327 static struct drm_i915_gem_object * 328 fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) 329 { 330 static struct lock_class_key lock_class; 331 struct drm_i915_gem_object *obj; 332 333 GEM_BUG_ON(!size); 334 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); 335 336 if (size >> PAGE_SHIFT > UINT_MAX) 337 return ERR_PTR(-E2BIG); 338 339 if (overflows_type(size, obj->base.size)) 340 return ERR_PTR(-E2BIG); 341 342 obj = i915_gem_object_alloc(); 343 if (!obj) 344 return ERR_PTR(-ENOMEM); 345 346 drm_gem_private_object_init(&i915->drm, &obj->base, size); 347 348 if (single) 349 i915_gem_object_init(obj, &fake_ops_single, &lock_class, 0); 350 else 351 i915_gem_object_init(obj, &fake_ops, &lock_class, 0); 352 353 i915_gem_object_set_volatile(obj); 354 355 obj->write_domain = I915_GEM_DOMAIN_CPU; 356 obj->read_domains = I915_GEM_DOMAIN_CPU; 357 obj->pat_index = i915_gem_get_pat_index(i915, I915_CACHE_NONE); 358 359 return obj; 360 } 361 362 static int igt_check_page_sizes(struct i915_vma *vma) 363 { 364 struct drm_i915_private *i915 = vma->vm->i915; 365 unsigned int supported = RUNTIME_INFO(i915)->page_sizes; 366 struct drm_i915_gem_object *obj = vma->obj; 367 int err; 368 369 /* We have to wait for the async bind to complete before our asserts */ 370 err = i915_vma_sync(vma); 371 if (err) 372 return err; 373 374 if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { 375 pr_err("unsupported page_sizes.sg=%u, supported=%u\n", 376 vma->page_sizes.sg & ~supported, supported); 377 err = -EINVAL; 378 } 379 380 if (!HAS_PAGE_SIZES(i915, vma->resource->page_sizes_gtt)) { 381 pr_err("unsupported page_sizes.gtt=%u, supported=%u\n", 382 vma->resource->page_sizes_gtt & ~supported, supported); 383 err = -EINVAL; 384 } 385 386 if (vma->page_sizes.phys != obj->mm.page_sizes.phys) { 387 pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n", 388 vma->page_sizes.phys, obj->mm.page_sizes.phys); 389 err = -EINVAL; 390 } 391 392 if (vma->page_sizes.sg != obj->mm.page_sizes.sg) { 393 pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n", 394 vma->page_sizes.sg, obj->mm.page_sizes.sg); 395 err = -EINVAL; 396 } 397 398 /* 399 * The dma-api is like a box of chocolates when it comes to the 400 * alignment of dma addresses, however for LMEM we have total control 401 * and so can guarantee alignment, likewise when we allocate our blocks 402 * they should appear in descending order, and if we know that we align 403 * to the largest page size for the GTT address, we should be able to 404 * assert that if we see 2M physical pages then we should also get 2M 405 * GTT pages. If we don't then something might be wrong in our 406 * construction of the backing pages. 407 * 408 * Maintaining alignment is required to utilise huge pages in the ppGGT. 409 */ 410 if (i915_gem_object_is_lmem(obj) && 411 IS_ALIGNED(i915_vma_offset(vma), SZ_2M) && 412 vma->page_sizes.sg & SZ_2M && 413 vma->resource->page_sizes_gtt < SZ_2M) { 414 pr_err("gtt pages mismatch for LMEM, expected 2M GTT pages, sg(%u), gtt(%u)\n", 415 vma->page_sizes.sg, vma->resource->page_sizes_gtt); 416 err = -EINVAL; 417 } 418 419 return err; 420 } 421 422 static int igt_mock_exhaust_device_supported_pages(void *arg) 423 { 424 struct i915_ppgtt *ppgtt = arg; 425 struct drm_i915_private *i915 = ppgtt->vm.i915; 426 unsigned int saved_mask = RUNTIME_INFO(i915)->page_sizes; 427 struct drm_i915_gem_object *obj; 428 struct i915_vma *vma; 429 int i, j, single; 430 int err; 431 432 /* 433 * Sanity check creating objects with every valid page support 434 * combination for our mock device. 435 */ 436 437 for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { 438 unsigned int combination = SZ_4K; /* Required for ppGTT */ 439 440 for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { 441 if (i & BIT(j)) 442 combination |= page_sizes[j]; 443 } 444 445 RUNTIME_INFO(i915)->page_sizes = combination; 446 447 for (single = 0; single <= 1; ++single) { 448 obj = fake_huge_pages_object(i915, combination, !!single); 449 if (IS_ERR(obj)) { 450 err = PTR_ERR(obj); 451 goto out_device; 452 } 453 454 if (obj->base.size != combination) { 455 pr_err("obj->base.size=%zu, expected=%u\n", 456 obj->base.size, combination); 457 err = -EINVAL; 458 goto out_put; 459 } 460 461 vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 462 if (IS_ERR(vma)) { 463 err = PTR_ERR(vma); 464 goto out_put; 465 } 466 467 err = i915_vma_pin(vma, 0, 0, PIN_USER); 468 if (err) 469 goto out_put; 470 471 err = igt_check_page_sizes(vma); 472 473 if (vma->page_sizes.sg != combination) { 474 pr_err("page_sizes.sg=%u, expected=%u\n", 475 vma->page_sizes.sg, combination); 476 err = -EINVAL; 477 } 478 479 i915_vma_unpin(vma); 480 i915_gem_object_put(obj); 481 482 if (err) 483 goto out_device; 484 } 485 } 486 487 goto out_device; 488 489 out_put: 490 i915_gem_object_put(obj); 491 out_device: 492 RUNTIME_INFO(i915)->page_sizes = saved_mask; 493 494 return err; 495 } 496 497 static int igt_mock_memory_region_huge_pages(void *arg) 498 { 499 const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS }; 500 struct i915_ppgtt *ppgtt = arg; 501 struct drm_i915_private *i915 = ppgtt->vm.i915; 502 unsigned long supported = RUNTIME_INFO(i915)->page_sizes; 503 struct intel_memory_region *mem; 504 struct drm_i915_gem_object *obj; 505 struct i915_vma *vma; 506 int bit; 507 int err = 0; 508 509 mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0); 510 if (IS_ERR(mem)) { 511 pr_err("%s failed to create memory region\n", __func__); 512 return PTR_ERR(mem); 513 } 514 515 for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 516 unsigned int page_size = BIT(bit); 517 resource_size_t phys; 518 int i; 519 520 for (i = 0; i < ARRAY_SIZE(flags); ++i) { 521 obj = i915_gem_object_create_region(mem, 522 page_size, page_size, 523 flags[i]); 524 if (IS_ERR(obj)) { 525 err = PTR_ERR(obj); 526 goto out_region; 527 } 528 529 vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 530 if (IS_ERR(vma)) { 531 err = PTR_ERR(vma); 532 goto out_put; 533 } 534 535 err = i915_vma_pin(vma, 0, 0, PIN_USER); 536 if (err) 537 goto out_put; 538 539 err = igt_check_page_sizes(vma); 540 if (err) 541 goto out_unpin; 542 543 phys = i915_gem_object_get_dma_address(obj, 0); 544 if (!IS_ALIGNED(phys, page_size)) { 545 pr_err("%s addr misaligned(%pa) page_size=%u\n", 546 __func__, &phys, page_size); 547 err = -EINVAL; 548 goto out_unpin; 549 } 550 551 if (vma->resource->page_sizes_gtt != page_size) { 552 pr_err("%s page_sizes.gtt=%u, expected=%u\n", 553 __func__, vma->resource->page_sizes_gtt, 554 page_size); 555 err = -EINVAL; 556 goto out_unpin; 557 } 558 559 i915_vma_unpin(vma); 560 __i915_gem_object_put_pages(obj); 561 i915_gem_object_put(obj); 562 } 563 } 564 565 goto out_region; 566 567 out_unpin: 568 i915_vma_unpin(vma); 569 out_put: 570 i915_gem_object_put(obj); 571 out_region: 572 intel_memory_region_destroy(mem); 573 return err; 574 } 575 576 static int igt_mock_ppgtt_misaligned_dma(void *arg) 577 { 578 struct i915_ppgtt *ppgtt = arg; 579 struct drm_i915_private *i915 = ppgtt->vm.i915; 580 unsigned long supported = RUNTIME_INFO(i915)->page_sizes; 581 struct drm_i915_gem_object *obj; 582 int bit; 583 int err; 584 585 /* 586 * Sanity check dma misalignment for huge pages -- the dma addresses we 587 * insert into the paging structures need to always respect the page 588 * size alignment. 589 */ 590 591 bit = ilog2(I915_GTT_PAGE_SIZE_64K); 592 593 for_each_set_bit_from(bit, &supported, 594 ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 595 IGT_TIMEOUT(end_time); 596 unsigned int page_size = BIT(bit); 597 unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; 598 unsigned int offset; 599 unsigned int size = 600 round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1; 601 struct i915_vma *vma; 602 603 obj = fake_huge_pages_object(i915, size, true); 604 if (IS_ERR(obj)) 605 return PTR_ERR(obj); 606 607 if (obj->base.size != size) { 608 pr_err("obj->base.size=%zu, expected=%u\n", 609 obj->base.size, size); 610 err = -EINVAL; 611 goto out_put; 612 } 613 614 err = i915_gem_object_pin_pages_unlocked(obj); 615 if (err) 616 goto out_put; 617 618 /* Force the page size for this object */ 619 obj->mm.page_sizes.sg = page_size; 620 621 vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 622 if (IS_ERR(vma)) { 623 err = PTR_ERR(vma); 624 goto out_unpin; 625 } 626 627 err = i915_vma_pin(vma, 0, 0, flags); 628 if (err) 629 goto out_unpin; 630 631 632 err = igt_check_page_sizes(vma); 633 634 if (vma->resource->page_sizes_gtt != page_size) { 635 pr_err("page_sizes.gtt=%u, expected %u\n", 636 vma->resource->page_sizes_gtt, page_size); 637 err = -EINVAL; 638 } 639 640 i915_vma_unpin(vma); 641 642 if (err) 643 goto out_unpin; 644 645 /* 646 * Try all the other valid offsets until the next 647 * boundary -- should always fall back to using 4K 648 * pages. 649 */ 650 for (offset = 4096; offset < page_size; offset += 4096) { 651 err = i915_vma_unbind_unlocked(vma); 652 if (err) 653 goto out_unpin; 654 655 err = i915_vma_pin(vma, 0, 0, flags | offset); 656 if (err) 657 goto out_unpin; 658 659 err = igt_check_page_sizes(vma); 660 661 if (vma->resource->page_sizes_gtt != I915_GTT_PAGE_SIZE_4K) { 662 pr_err("page_sizes.gtt=%u, expected %llu\n", 663 vma->resource->page_sizes_gtt, 664 I915_GTT_PAGE_SIZE_4K); 665 err = -EINVAL; 666 } 667 668 i915_vma_unpin(vma); 669 670 if (err) 671 goto out_unpin; 672 673 if (igt_timeout(end_time, 674 "%s timed out at offset %x with page-size %x\n", 675 __func__, offset, page_size)) 676 break; 677 } 678 679 i915_gem_object_lock(obj, NULL); 680 i915_gem_object_unpin_pages(obj); 681 __i915_gem_object_put_pages(obj); 682 i915_gem_object_unlock(obj); 683 i915_gem_object_put(obj); 684 } 685 686 return 0; 687 688 out_unpin: 689 i915_gem_object_lock(obj, NULL); 690 i915_gem_object_unpin_pages(obj); 691 i915_gem_object_unlock(obj); 692 out_put: 693 i915_gem_object_put(obj); 694 695 return err; 696 } 697 698 static void close_object_list(struct list_head *objects) 699 { 700 struct drm_i915_gem_object *obj, *on; 701 702 list_for_each_entry_safe(obj, on, objects, st_link) { 703 list_del(&obj->st_link); 704 i915_gem_object_lock(obj, NULL); 705 i915_gem_object_unpin_pages(obj); 706 __i915_gem_object_put_pages(obj); 707 i915_gem_object_unlock(obj); 708 i915_gem_object_put(obj); 709 } 710 } 711 712 static int igt_ppgtt_huge_fill(void *arg) 713 { 714 struct drm_i915_private *i915 = arg; 715 unsigned int supported = RUNTIME_INFO(i915)->page_sizes; 716 bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55); 717 struct i915_address_space *vm; 718 struct i915_gem_context *ctx; 719 unsigned long max_pages; 720 unsigned long page_num; 721 struct file *file; 722 bool single = false; 723 LIST_HEAD(objects); 724 IGT_TIMEOUT(end_time); 725 int err = -ENODEV; 726 727 if (supported == I915_GTT_PAGE_SIZE_4K) 728 return 0; 729 730 file = mock_file(i915); 731 if (IS_ERR(file)) 732 return PTR_ERR(file); 733 734 ctx = hugepage_ctx(i915, file); 735 if (IS_ERR(ctx)) { 736 err = PTR_ERR(ctx); 737 goto out; 738 } 739 vm = i915_gem_context_get_eb_vm(ctx); 740 max_pages = vm->total >> PAGE_SHIFT; 741 742 for_each_prime_number_from(page_num, 1, max_pages) { 743 struct drm_i915_gem_object *obj; 744 u64 size = page_num << PAGE_SHIFT; 745 struct i915_vma *vma; 746 unsigned int expected_gtt = 0; 747 int i; 748 749 obj = fake_huge_pages_object(i915, size, single); 750 if (IS_ERR(obj)) { 751 err = PTR_ERR(obj); 752 break; 753 } 754 755 if (obj->base.size != size) { 756 pr_err("obj->base.size=%zd, expected=%llu\n", 757 obj->base.size, size); 758 i915_gem_object_put(obj); 759 err = -EINVAL; 760 break; 761 } 762 763 err = i915_gem_object_pin_pages_unlocked(obj); 764 if (err) { 765 i915_gem_object_put(obj); 766 break; 767 } 768 769 list_add(&obj->st_link, &objects); 770 771 vma = i915_vma_instance(obj, vm, NULL); 772 if (IS_ERR(vma)) { 773 err = PTR_ERR(vma); 774 break; 775 } 776 777 /* vma start must be aligned to BIT(21) to allow 2M PTEs */ 778 err = i915_vma_pin(vma, 0, BIT(21), PIN_USER); 779 if (err) 780 break; 781 782 err = igt_check_page_sizes(vma); 783 if (err) { 784 i915_vma_unpin(vma); 785 break; 786 } 787 788 /* 789 * Figure out the expected gtt page size knowing that we go from 790 * largest to smallest page size sg chunks, and that we align to 791 * the largest page size. 792 */ 793 for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { 794 unsigned int page_size = page_sizes[i]; 795 796 if (HAS_PAGE_SIZES(i915, page_size) && 797 size >= page_size) { 798 expected_gtt |= page_size; 799 size &= page_size-1; 800 } 801 } 802 803 GEM_BUG_ON(!expected_gtt); 804 GEM_BUG_ON(size); 805 806 if (!has_pte64 && (obj->base.size < I915_GTT_PAGE_SIZE_2M || 807 expected_gtt & I915_GTT_PAGE_SIZE_2M)) 808 expected_gtt &= ~I915_GTT_PAGE_SIZE_64K; 809 810 i915_vma_unpin(vma); 811 812 if (!has_pte64 && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { 813 if (!IS_ALIGNED(vma->node.start, 814 I915_GTT_PAGE_SIZE_2M)) { 815 pr_err("node.start(%llx) not aligned to 2M\n", 816 vma->node.start); 817 err = -EINVAL; 818 break; 819 } 820 821 if (!IS_ALIGNED(vma->node.size, 822 I915_GTT_PAGE_SIZE_2M)) { 823 pr_err("node.size(%llx) not aligned to 2M\n", 824 vma->node.size); 825 err = -EINVAL; 826 break; 827 } 828 } 829 830 if (vma->resource->page_sizes_gtt != expected_gtt) { 831 pr_err("gtt=%#x, expected=%#x, size=0x%zx, single=%s\n", 832 vma->resource->page_sizes_gtt, expected_gtt, 833 obj->base.size, str_yes_no(!!single)); 834 err = -EINVAL; 835 break; 836 } 837 838 if (igt_timeout(end_time, 839 "%s timed out at size %zd\n", 840 __func__, obj->base.size)) 841 break; 842 843 single = !single; 844 } 845 846 close_object_list(&objects); 847 848 if (err == -ENOMEM || err == -ENOSPC) 849 err = 0; 850 851 i915_vm_put(vm); 852 out: 853 fput(file); 854 return err; 855 } 856 857 static int igt_ppgtt_64K(void *arg) 858 { 859 struct drm_i915_private *i915 = arg; 860 bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55); 861 struct drm_i915_gem_object *obj; 862 struct i915_address_space *vm; 863 struct i915_gem_context *ctx; 864 struct file *file; 865 const struct object_info { 866 unsigned int size; 867 unsigned int gtt; 868 unsigned int offset; 869 } objects[] = { 870 /* Cases with forced padding/alignment */ 871 { 872 .size = SZ_64K, 873 .gtt = I915_GTT_PAGE_SIZE_64K, 874 .offset = 0, 875 }, 876 { 877 .size = SZ_64K + SZ_4K, 878 .gtt = I915_GTT_PAGE_SIZE_4K, 879 .offset = 0, 880 }, 881 { 882 .size = SZ_64K - SZ_4K, 883 .gtt = I915_GTT_PAGE_SIZE_4K, 884 .offset = 0, 885 }, 886 { 887 .size = SZ_2M, 888 .gtt = I915_GTT_PAGE_SIZE_64K, 889 .offset = 0, 890 }, 891 { 892 .size = SZ_2M - SZ_4K, 893 .gtt = I915_GTT_PAGE_SIZE_4K, 894 .offset = 0, 895 }, 896 { 897 .size = SZ_2M + SZ_4K, 898 .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K, 899 .offset = 0, 900 }, 901 { 902 .size = SZ_2M + SZ_64K, 903 .gtt = I915_GTT_PAGE_SIZE_64K, 904 .offset = 0, 905 }, 906 { 907 .size = SZ_2M - SZ_64K, 908 .gtt = I915_GTT_PAGE_SIZE_64K, 909 .offset = 0, 910 }, 911 /* Try without any forced padding/alignment */ 912 { 913 .size = SZ_64K, 914 .offset = SZ_2M, 915 .gtt = I915_GTT_PAGE_SIZE_4K, 916 }, 917 { 918 .size = SZ_128K, 919 .offset = SZ_2M - SZ_64K, 920 .gtt = I915_GTT_PAGE_SIZE_4K, 921 }, 922 }; 923 struct i915_vma *vma; 924 int i, single; 925 int err; 926 927 /* 928 * Sanity check some of the trickiness with 64K pages -- either we can 929 * safely mark the whole page-table(2M block) as 64K, or we have to 930 * always fallback to 4K. 931 */ 932 933 if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) 934 return 0; 935 936 file = mock_file(i915); 937 if (IS_ERR(file)) 938 return PTR_ERR(file); 939 940 ctx = hugepage_ctx(i915, file); 941 if (IS_ERR(ctx)) { 942 err = PTR_ERR(ctx); 943 goto out; 944 } 945 vm = i915_gem_context_get_eb_vm(ctx); 946 947 for (i = 0; i < ARRAY_SIZE(objects); ++i) { 948 unsigned int size = objects[i].size; 949 unsigned int expected_gtt = objects[i].gtt; 950 unsigned int offset = objects[i].offset; 951 unsigned int flags = PIN_USER; 952 953 /* 954 * For modern GTT models, the requirements for marking a page-table 955 * as 64K have been relaxed. Account for this. 956 */ 957 if (has_pte64) { 958 expected_gtt = 0; 959 if (size >= SZ_64K) 960 expected_gtt |= I915_GTT_PAGE_SIZE_64K; 961 if (size & (SZ_64K - 1)) 962 expected_gtt |= I915_GTT_PAGE_SIZE_4K; 963 } 964 965 for (single = 0; single <= 1; single++) { 966 obj = fake_huge_pages_object(i915, size, !!single); 967 if (IS_ERR(obj)) { 968 err = PTR_ERR(obj); 969 goto out_vm; 970 } 971 972 err = i915_gem_object_pin_pages_unlocked(obj); 973 if (err) 974 goto out_object_put; 975 976 /* 977 * Disable 2M pages -- We only want to use 64K/4K pages 978 * for this test. 979 */ 980 obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; 981 982 vma = i915_vma_instance(obj, vm, NULL); 983 if (IS_ERR(vma)) { 984 err = PTR_ERR(vma); 985 goto out_object_unpin; 986 } 987 988 if (offset) 989 flags |= PIN_OFFSET_FIXED | offset; 990 991 err = i915_vma_pin(vma, 0, 0, flags); 992 if (err) 993 goto out_object_unpin; 994 995 err = igt_check_page_sizes(vma); 996 if (err) 997 goto out_vma_unpin; 998 999 if (!has_pte64 && !offset && 1000 vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { 1001 if (!IS_ALIGNED(vma->node.start, 1002 I915_GTT_PAGE_SIZE_2M)) { 1003 pr_err("node.start(%llx) not aligned to 2M\n", 1004 vma->node.start); 1005 err = -EINVAL; 1006 goto out_vma_unpin; 1007 } 1008 1009 if (!IS_ALIGNED(vma->node.size, 1010 I915_GTT_PAGE_SIZE_2M)) { 1011 pr_err("node.size(%llx) not aligned to 2M\n", 1012 vma->node.size); 1013 err = -EINVAL; 1014 goto out_vma_unpin; 1015 } 1016 } 1017 1018 if (vma->resource->page_sizes_gtt != expected_gtt) { 1019 pr_err("gtt=%#x, expected=%#x, i=%d, single=%s offset=%#x size=%#x\n", 1020 vma->resource->page_sizes_gtt, 1021 expected_gtt, i, str_yes_no(!!single), 1022 offset, size); 1023 err = -EINVAL; 1024 goto out_vma_unpin; 1025 } 1026 1027 i915_vma_unpin(vma); 1028 i915_gem_object_lock(obj, NULL); 1029 i915_gem_object_unpin_pages(obj); 1030 __i915_gem_object_put_pages(obj); 1031 i915_gem_object_unlock(obj); 1032 i915_gem_object_put(obj); 1033 1034 i915_gem_drain_freed_objects(i915); 1035 } 1036 } 1037 1038 goto out_vm; 1039 1040 out_vma_unpin: 1041 i915_vma_unpin(vma); 1042 out_object_unpin: 1043 i915_gem_object_lock(obj, NULL); 1044 i915_gem_object_unpin_pages(obj); 1045 i915_gem_object_unlock(obj); 1046 out_object_put: 1047 i915_gem_object_put(obj); 1048 out_vm: 1049 i915_vm_put(vm); 1050 out: 1051 fput(file); 1052 return err; 1053 } 1054 1055 static int gpu_write(struct intel_context *ce, 1056 struct i915_vma *vma, 1057 u32 dw, 1058 u32 val) 1059 { 1060 int err; 1061 1062 i915_gem_object_lock(vma->obj, NULL); 1063 err = i915_gem_object_set_to_gtt_domain(vma->obj, true); 1064 i915_gem_object_unlock(vma->obj); 1065 if (err) 1066 return err; 1067 1068 return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), 1069 vma->size >> PAGE_SHIFT, val); 1070 } 1071 1072 static int 1073 __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) 1074 { 1075 unsigned int needs_flush; 1076 unsigned long n; 1077 int err; 1078 1079 i915_gem_object_lock(obj, NULL); 1080 err = i915_gem_object_prepare_read(obj, &needs_flush); 1081 if (err) 1082 goto err_unlock; 1083 1084 for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { 1085 u32 *ptr = kmap_local_page(i915_gem_object_get_page(obj, n)); 1086 1087 if (needs_flush & CLFLUSH_BEFORE) 1088 drm_clflush_virt_range(ptr, PAGE_SIZE); 1089 1090 if (ptr[dword] != val) { 1091 pr_err("n=%lu ptr[%u]=%u, val=%u\n", 1092 n, dword, ptr[dword], val); 1093 kunmap_local(ptr); 1094 err = -EINVAL; 1095 break; 1096 } 1097 1098 kunmap_local(ptr); 1099 } 1100 1101 i915_gem_object_finish_access(obj); 1102 err_unlock: 1103 i915_gem_object_unlock(obj); 1104 1105 return err; 1106 } 1107 1108 static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val) 1109 { 1110 unsigned long n = obj->base.size >> PAGE_SHIFT; 1111 u32 *ptr; 1112 int err; 1113 1114 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1115 if (err) 1116 return err; 1117 1118 ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 1119 if (IS_ERR(ptr)) 1120 return PTR_ERR(ptr); 1121 1122 ptr += dword; 1123 while (n--) { 1124 if (*ptr != val) { 1125 pr_err("base[%u]=%08x, val=%08x\n", 1126 dword, *ptr, val); 1127 err = -EINVAL; 1128 break; 1129 } 1130 1131 ptr += PAGE_SIZE / sizeof(*ptr); 1132 } 1133 1134 i915_gem_object_unpin_map(obj); 1135 return err; 1136 } 1137 1138 static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) 1139 { 1140 if (i915_gem_object_has_struct_page(obj)) 1141 return __cpu_check_shmem(obj, dword, val); 1142 else 1143 return __cpu_check_vmap(obj, dword, val); 1144 } 1145 1146 static int __igt_write_huge(struct intel_context *ce, 1147 struct drm_i915_gem_object *obj, 1148 u64 size, u64 offset, 1149 u32 dword, u32 val) 1150 { 1151 unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; 1152 struct i915_vma *vma; 1153 int err; 1154 1155 vma = i915_vma_instance(obj, ce->vm, NULL); 1156 if (IS_ERR(vma)) 1157 return PTR_ERR(vma); 1158 1159 err = i915_vma_pin(vma, size, 0, flags | offset); 1160 if (err) { 1161 /* 1162 * The ggtt may have some pages reserved so 1163 * refrain from erroring out. 1164 */ 1165 if (err == -ENOSPC && i915_is_ggtt(ce->vm)) 1166 err = 0; 1167 1168 return err; 1169 } 1170 1171 err = igt_check_page_sizes(vma); 1172 if (err) 1173 goto out_vma_unpin; 1174 1175 err = gpu_write(ce, vma, dword, val); 1176 if (err) { 1177 pr_err("gpu-write failed at offset=%llx\n", offset); 1178 goto out_vma_unpin; 1179 } 1180 1181 err = cpu_check(obj, dword, val); 1182 if (err) { 1183 pr_err("cpu-check failed at offset=%llx\n", offset); 1184 goto out_vma_unpin; 1185 } 1186 1187 out_vma_unpin: 1188 i915_vma_unpin(vma); 1189 return err; 1190 } 1191 1192 static int igt_write_huge(struct drm_i915_private *i915, 1193 struct drm_i915_gem_object *obj) 1194 { 1195 struct i915_gem_engines *engines; 1196 struct i915_gem_engines_iter it; 1197 struct intel_context *ce; 1198 I915_RND_STATE(prng); 1199 IGT_TIMEOUT(end_time); 1200 unsigned int max_page_size; 1201 unsigned int count; 1202 struct i915_gem_context *ctx; 1203 struct file *file; 1204 u64 max; 1205 u64 num; 1206 u64 size; 1207 int *order; 1208 int i, n; 1209 int err = 0; 1210 1211 file = mock_file(i915); 1212 if (IS_ERR(file)) 1213 return PTR_ERR(file); 1214 1215 ctx = hugepage_ctx(i915, file); 1216 if (IS_ERR(ctx)) { 1217 err = PTR_ERR(ctx); 1218 goto out; 1219 } 1220 1221 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1222 1223 size = obj->base.size; 1224 if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K && 1225 !HAS_64K_PAGES(i915)) 1226 size = round_up(size, I915_GTT_PAGE_SIZE_2M); 1227 1228 n = 0; 1229 count = 0; 1230 max = U64_MAX; 1231 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1232 count++; 1233 if (!intel_engine_can_store_dword(ce->engine)) 1234 continue; 1235 1236 max = min(max, ce->vm->total); 1237 n++; 1238 } 1239 i915_gem_context_unlock_engines(ctx); 1240 if (!n) 1241 goto out; 1242 1243 /* 1244 * To keep things interesting when alternating between engines in our 1245 * randomized order, lets also make feeding to the same engine a few 1246 * times in succession a possibility by enlarging the permutation array. 1247 */ 1248 order = i915_random_order(count * count, &prng); 1249 if (!order) { 1250 err = -ENOMEM; 1251 goto out; 1252 } 1253 1254 max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); 1255 max = div_u64(max - size, max_page_size); 1256 1257 /* 1258 * Try various offsets in an ascending/descending fashion until we 1259 * timeout -- we want to avoid issues hidden by effectively always using 1260 * offset = 0. 1261 */ 1262 i = 0; 1263 engines = i915_gem_context_lock_engines(ctx); 1264 for_each_prime_number_from(num, 0, max) { 1265 u64 offset_low = num * max_page_size; 1266 u64 offset_high = (max - num) * max_page_size; 1267 u32 dword = offset_in_page(num) / 4; 1268 struct intel_context *ce; 1269 1270 ce = engines->engines[order[i] % engines->num_engines]; 1271 i = (i + 1) % (count * count); 1272 if (!ce || !intel_engine_can_store_dword(ce->engine)) 1273 continue; 1274 1275 /* 1276 * In order to utilize 64K pages we need to both pad the vma 1277 * size and ensure the vma offset is at the start of the pt 1278 * boundary, however to improve coverage we opt for testing both 1279 * aligned and unaligned offsets. 1280 * 1281 * With PS64 this is no longer the case, but to ensure we 1282 * sometimes get the compact layout for smaller objects, apply 1283 * the round_up anyway. 1284 */ 1285 if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) 1286 offset_low = round_down(offset_low, 1287 I915_GTT_PAGE_SIZE_2M); 1288 1289 err = __igt_write_huge(ce, obj, size, offset_low, 1290 dword, num + 1); 1291 if (err) 1292 break; 1293 1294 err = __igt_write_huge(ce, obj, size, offset_high, 1295 dword, num + 1); 1296 if (err) 1297 break; 1298 1299 if (igt_timeout(end_time, 1300 "%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n", 1301 __func__, ce->engine->name, offset_low, offset_high, 1302 max_page_size)) 1303 break; 1304 } 1305 i915_gem_context_unlock_engines(ctx); 1306 1307 kfree(order); 1308 1309 out: 1310 fput(file); 1311 return err; 1312 } 1313 1314 typedef struct drm_i915_gem_object * 1315 (*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags); 1316 1317 static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) 1318 { 1319 return i915->mm.gemfs && has_transparent_hugepage(); 1320 } 1321 1322 static struct drm_i915_gem_object * 1323 igt_create_shmem(struct drm_i915_private *i915, u32 size, u32 flags) 1324 { 1325 if (!igt_can_allocate_thp(i915)) { 1326 pr_info("%s missing THP support, skipping\n", __func__); 1327 return ERR_PTR(-ENODEV); 1328 } 1329 1330 return i915_gem_object_create_shmem(i915, size); 1331 } 1332 1333 static struct drm_i915_gem_object * 1334 igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags) 1335 { 1336 return i915_gem_object_create_internal(i915, size); 1337 } 1338 1339 static struct drm_i915_gem_object * 1340 igt_create_system(struct drm_i915_private *i915, u32 size, u32 flags) 1341 { 1342 return huge_pages_object(i915, size, size); 1343 } 1344 1345 static struct drm_i915_gem_object * 1346 igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags) 1347 { 1348 return i915_gem_object_create_lmem(i915, size, flags); 1349 } 1350 1351 static u32 igt_random_size(struct rnd_state *prng, 1352 u32 min_page_size, 1353 u32 max_page_size) 1354 { 1355 u64 mask; 1356 u32 size; 1357 1358 GEM_BUG_ON(!is_power_of_2(min_page_size)); 1359 GEM_BUG_ON(!is_power_of_2(max_page_size)); 1360 GEM_BUG_ON(min_page_size < PAGE_SIZE); 1361 GEM_BUG_ON(min_page_size > max_page_size); 1362 1363 mask = ((max_page_size << 1ULL) - 1) & PAGE_MASK; 1364 size = prandom_u32_state(prng) & mask; 1365 if (size < min_page_size) 1366 size |= min_page_size; 1367 1368 return size; 1369 } 1370 1371 static int igt_ppgtt_smoke_huge(void *arg) 1372 { 1373 struct drm_i915_private *i915 = arg; 1374 struct drm_i915_gem_object *obj; 1375 I915_RND_STATE(prng); 1376 struct { 1377 igt_create_fn fn; 1378 u32 min; 1379 u32 max; 1380 } backends[] = { 1381 { igt_create_internal, SZ_64K, SZ_2M, }, 1382 { igt_create_shmem, SZ_64K, SZ_32M, }, 1383 { igt_create_local, SZ_64K, SZ_1G, }, 1384 }; 1385 int err; 1386 int i; 1387 1388 /* 1389 * Sanity check that the HW uses huge pages correctly through our 1390 * various backends -- ensure that our writes land in the right place. 1391 */ 1392 1393 for (i = 0; i < ARRAY_SIZE(backends); ++i) { 1394 u32 min = backends[i].min; 1395 u32 max = backends[i].max; 1396 u32 size = max; 1397 1398 try_again: 1399 size = igt_random_size(&prng, min, rounddown_pow_of_two(size)); 1400 1401 obj = backends[i].fn(i915, size, 0); 1402 if (IS_ERR(obj)) { 1403 err = PTR_ERR(obj); 1404 if (err == -E2BIG) { 1405 size >>= 1; 1406 goto try_again; 1407 } else if (err == -ENODEV) { 1408 err = 0; 1409 continue; 1410 } 1411 1412 return err; 1413 } 1414 1415 err = i915_gem_object_pin_pages_unlocked(obj); 1416 if (err) { 1417 if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) { 1418 i915_gem_object_put(obj); 1419 size >>= 1; 1420 goto try_again; 1421 } 1422 goto out_put; 1423 } 1424 1425 if (obj->mm.page_sizes.phys < min) { 1426 pr_info("%s unable to allocate huge-page(s) with size=%u, i=%d\n", 1427 __func__, size, i); 1428 err = -ENOMEM; 1429 goto out_unpin; 1430 } 1431 1432 err = igt_write_huge(i915, obj); 1433 if (err) { 1434 pr_err("%s write-huge failed with size=%u, i=%d\n", 1435 __func__, size, i); 1436 } 1437 out_unpin: 1438 i915_gem_object_lock(obj, NULL); 1439 i915_gem_object_unpin_pages(obj); 1440 __i915_gem_object_put_pages(obj); 1441 i915_gem_object_unlock(obj); 1442 out_put: 1443 i915_gem_object_put(obj); 1444 1445 if (err == -ENOMEM || err == -ENXIO) 1446 err = 0; 1447 1448 if (err) 1449 break; 1450 1451 cond_resched(); 1452 } 1453 1454 return err; 1455 } 1456 1457 static int igt_ppgtt_sanity_check(void *arg) 1458 { 1459 struct drm_i915_private *i915 = arg; 1460 unsigned int supported = RUNTIME_INFO(i915)->page_sizes; 1461 struct { 1462 igt_create_fn fn; 1463 unsigned int flags; 1464 } backends[] = { 1465 { igt_create_system, 0, }, 1466 { igt_create_local, 0, }, 1467 { igt_create_local, I915_BO_ALLOC_CONTIGUOUS, }, 1468 }; 1469 struct { 1470 u32 size; 1471 u32 pages; 1472 } combos[] = { 1473 { SZ_64K, SZ_64K }, 1474 { SZ_2M, SZ_2M }, 1475 { SZ_2M, SZ_64K }, 1476 { SZ_2M - SZ_64K, SZ_64K }, 1477 { SZ_2M - SZ_4K, SZ_64K | SZ_4K }, 1478 { SZ_2M + SZ_4K, SZ_64K | SZ_4K }, 1479 { SZ_2M + SZ_4K, SZ_2M | SZ_4K }, 1480 { SZ_2M + SZ_64K, SZ_2M | SZ_64K }, 1481 { SZ_2M + SZ_64K, SZ_64K }, 1482 }; 1483 int i, j; 1484 int err; 1485 1486 if (supported == I915_GTT_PAGE_SIZE_4K) 1487 return 0; 1488 1489 /* 1490 * Sanity check that the HW behaves with a limited set of combinations. 1491 * We already have a bunch of randomised testing, which should give us 1492 * a decent amount of variation between runs, however we should keep 1493 * this to limit the chances of introducing a temporary regression, by 1494 * testing the most obvious cases that might make something blow up. 1495 */ 1496 1497 for (i = 0; i < ARRAY_SIZE(backends); ++i) { 1498 for (j = 0; j < ARRAY_SIZE(combos); ++j) { 1499 struct drm_i915_gem_object *obj; 1500 u32 size = combos[j].size; 1501 u32 pages = combos[j].pages; 1502 1503 obj = backends[i].fn(i915, size, backends[i].flags); 1504 if (IS_ERR(obj)) { 1505 err = PTR_ERR(obj); 1506 if (err == -ENODEV) { 1507 pr_info("Device lacks local memory, skipping\n"); 1508 err = 0; 1509 break; 1510 } 1511 1512 return err; 1513 } 1514 1515 err = i915_gem_object_pin_pages_unlocked(obj); 1516 if (err) { 1517 i915_gem_object_put(obj); 1518 goto out; 1519 } 1520 1521 GEM_BUG_ON(pages > obj->base.size); 1522 pages = pages & supported; 1523 1524 if (pages) 1525 obj->mm.page_sizes.sg = pages; 1526 1527 err = igt_write_huge(i915, obj); 1528 1529 i915_gem_object_lock(obj, NULL); 1530 i915_gem_object_unpin_pages(obj); 1531 __i915_gem_object_put_pages(obj); 1532 i915_gem_object_unlock(obj); 1533 i915_gem_object_put(obj); 1534 1535 if (err) { 1536 pr_err("%s write-huge failed with size=%u pages=%u i=%d, j=%d\n", 1537 __func__, size, pages, i, j); 1538 goto out; 1539 } 1540 } 1541 1542 cond_resched(); 1543 } 1544 1545 out: 1546 if (err == -ENOMEM) 1547 err = 0; 1548 1549 return err; 1550 } 1551 1552 static int igt_ppgtt_compact(void *arg) 1553 { 1554 struct drm_i915_private *i915 = arg; 1555 struct drm_i915_gem_object *obj; 1556 int err; 1557 1558 /* 1559 * Simple test to catch issues with compact 64K pages -- since the pt is 1560 * compacted to 256B that gives us 32 entries per pt, however since the 1561 * backing page for the pt is 4K, any extra entries we might incorrectly 1562 * write out should be ignored by the HW. If ever hit such a case this 1563 * test should catch it since some of our writes would land in scratch. 1564 */ 1565 1566 if (!HAS_64K_PAGES(i915)) { 1567 pr_info("device lacks compact 64K page support, skipping\n"); 1568 return 0; 1569 } 1570 1571 if (!HAS_LMEM(i915)) { 1572 pr_info("device lacks LMEM support, skipping\n"); 1573 return 0; 1574 } 1575 1576 /* We want the range to cover multiple page-table boundaries. */ 1577 obj = i915_gem_object_create_lmem(i915, SZ_4M, 0); 1578 if (IS_ERR(obj)) 1579 return PTR_ERR(obj); 1580 1581 err = i915_gem_object_pin_pages_unlocked(obj); 1582 if (err) 1583 goto out_put; 1584 1585 if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { 1586 pr_info("LMEM compact unable to allocate huge-page(s)\n"); 1587 goto out_unpin; 1588 } 1589 1590 /* 1591 * Disable 2M GTT pages by forcing the page-size to 64K for the GTT 1592 * insertion. 1593 */ 1594 obj->mm.page_sizes.sg = I915_GTT_PAGE_SIZE_64K; 1595 1596 err = igt_write_huge(i915, obj); 1597 if (err) 1598 pr_err("LMEM compact write-huge failed\n"); 1599 1600 out_unpin: 1601 i915_gem_object_unpin_pages(obj); 1602 out_put: 1603 i915_gem_object_put(obj); 1604 1605 if (err == -ENOMEM) 1606 err = 0; 1607 1608 return err; 1609 } 1610 1611 static int igt_ppgtt_mixed(void *arg) 1612 { 1613 struct drm_i915_private *i915 = arg; 1614 const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; 1615 struct drm_i915_gem_object *obj, *on; 1616 struct i915_gem_engines *engines; 1617 struct i915_gem_engines_iter it; 1618 struct i915_address_space *vm; 1619 struct i915_gem_context *ctx; 1620 struct intel_context *ce; 1621 struct file *file; 1622 I915_RND_STATE(prng); 1623 LIST_HEAD(objects); 1624 struct intel_memory_region *mr; 1625 struct i915_vma *vma; 1626 unsigned int count; 1627 u32 i, addr; 1628 int *order; 1629 int n, err; 1630 1631 /* 1632 * Sanity check mixing 4K and 64K pages within the same page-table via 1633 * the new PS64 TLB hint. 1634 */ 1635 1636 if (!HAS_64K_PAGES(i915)) { 1637 pr_info("device lacks PS64, skipping\n"); 1638 return 0; 1639 } 1640 1641 file = mock_file(i915); 1642 if (IS_ERR(file)) 1643 return PTR_ERR(file); 1644 1645 ctx = hugepage_ctx(i915, file); 1646 if (IS_ERR(ctx)) { 1647 err = PTR_ERR(ctx); 1648 goto out; 1649 } 1650 vm = i915_gem_context_get_eb_vm(ctx); 1651 1652 i = 0; 1653 addr = 0; 1654 do { 1655 u32 sz; 1656 1657 sz = i915_prandom_u32_max_state(SZ_4M, &prng); 1658 sz = max_t(u32, sz, SZ_4K); 1659 1660 mr = i915->mm.regions[INTEL_REGION_LMEM_0]; 1661 if (i & 1) 1662 mr = i915->mm.regions[INTEL_REGION_SMEM]; 1663 1664 obj = i915_gem_object_create_region(mr, sz, 0, 0); 1665 if (IS_ERR(obj)) { 1666 err = PTR_ERR(obj); 1667 goto out_vm; 1668 } 1669 1670 list_add_tail(&obj->st_link, &objects); 1671 1672 vma = i915_vma_instance(obj, vm, NULL); 1673 if (IS_ERR(vma)) { 1674 err = PTR_ERR(vma); 1675 goto err_put; 1676 } 1677 1678 addr = round_up(addr, mr->min_page_size); 1679 err = i915_vma_pin(vma, 0, 0, addr | flags); 1680 if (err) 1681 goto err_put; 1682 1683 if (mr->type == INTEL_MEMORY_LOCAL && 1684 (vma->resource->page_sizes_gtt & I915_GTT_PAGE_SIZE_4K)) { 1685 err = -EINVAL; 1686 goto err_put; 1687 } 1688 1689 addr += obj->base.size; 1690 i++; 1691 } while (addr <= SZ_16M); 1692 1693 n = 0; 1694 count = 0; 1695 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1696 count++; 1697 if (!intel_engine_can_store_dword(ce->engine)) 1698 continue; 1699 1700 n++; 1701 } 1702 i915_gem_context_unlock_engines(ctx); 1703 if (!n) 1704 goto err_put; 1705 1706 order = i915_random_order(count * count, &prng); 1707 if (!order) { 1708 err = -ENOMEM; 1709 goto err_put; 1710 } 1711 1712 i = 0; 1713 addr = 0; 1714 engines = i915_gem_context_lock_engines(ctx); 1715 list_for_each_entry(obj, &objects, st_link) { 1716 u32 rnd = i915_prandom_u32_max_state(UINT_MAX, &prng); 1717 1718 addr = round_up(addr, obj->mm.region->min_page_size); 1719 1720 ce = engines->engines[order[i] % engines->num_engines]; 1721 i = (i + 1) % (count * count); 1722 if (!ce || !intel_engine_can_store_dword(ce->engine)) 1723 continue; 1724 1725 err = __igt_write_huge(ce, obj, obj->base.size, addr, 0, rnd); 1726 if (err) 1727 break; 1728 1729 err = __igt_write_huge(ce, obj, obj->base.size, addr, 1730 offset_in_page(rnd) / sizeof(u32), rnd + 1); 1731 if (err) 1732 break; 1733 1734 err = __igt_write_huge(ce, obj, obj->base.size, addr, 1735 (PAGE_SIZE / sizeof(u32)) - 1, 1736 rnd + 2); 1737 if (err) 1738 break; 1739 1740 addr += obj->base.size; 1741 1742 cond_resched(); 1743 } 1744 1745 i915_gem_context_unlock_engines(ctx); 1746 kfree(order); 1747 err_put: 1748 list_for_each_entry_safe(obj, on, &objects, st_link) { 1749 list_del(&obj->st_link); 1750 i915_gem_object_put(obj); 1751 } 1752 out_vm: 1753 i915_vm_put(vm); 1754 out: 1755 fput(file); 1756 return err; 1757 } 1758 1759 static int igt_tmpfs_fallback(void *arg) 1760 { 1761 struct drm_i915_private *i915 = arg; 1762 struct i915_address_space *vm; 1763 struct i915_gem_context *ctx; 1764 struct vfsmount *gemfs = i915->mm.gemfs; 1765 struct drm_i915_gem_object *obj; 1766 struct i915_vma *vma; 1767 struct file *file; 1768 u32 *vaddr; 1769 int err = 0; 1770 1771 file = mock_file(i915); 1772 if (IS_ERR(file)) 1773 return PTR_ERR(file); 1774 1775 ctx = hugepage_ctx(i915, file); 1776 if (IS_ERR(ctx)) { 1777 err = PTR_ERR(ctx); 1778 goto out; 1779 } 1780 vm = i915_gem_context_get_eb_vm(ctx); 1781 1782 /* 1783 * Make sure that we don't burst into a ball of flames upon falling back 1784 * to tmpfs, which we rely on if on the off-chance we encouter a failure 1785 * when setting up gemfs. 1786 */ 1787 1788 i915->mm.gemfs = NULL; 1789 1790 obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); 1791 if (IS_ERR(obj)) { 1792 err = PTR_ERR(obj); 1793 goto out_restore; 1794 } 1795 1796 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1797 if (IS_ERR(vaddr)) { 1798 err = PTR_ERR(vaddr); 1799 goto out_put; 1800 } 1801 *vaddr = 0xdeadbeaf; 1802 1803 __i915_gem_object_flush_map(obj, 0, 64); 1804 i915_gem_object_unpin_map(obj); 1805 1806 vma = i915_vma_instance(obj, vm, NULL); 1807 if (IS_ERR(vma)) { 1808 err = PTR_ERR(vma); 1809 goto out_put; 1810 } 1811 1812 err = i915_vma_pin(vma, 0, 0, PIN_USER); 1813 if (err) 1814 goto out_put; 1815 1816 err = igt_check_page_sizes(vma); 1817 1818 i915_vma_unpin(vma); 1819 out_put: 1820 i915_gem_object_put(obj); 1821 out_restore: 1822 i915->mm.gemfs = gemfs; 1823 1824 i915_vm_put(vm); 1825 out: 1826 fput(file); 1827 return err; 1828 } 1829 1830 static int igt_shrink_thp(void *arg) 1831 { 1832 struct drm_i915_private *i915 = arg; 1833 struct i915_address_space *vm; 1834 struct i915_gem_context *ctx; 1835 struct drm_i915_gem_object *obj; 1836 struct i915_gem_engines_iter it; 1837 struct intel_context *ce; 1838 struct i915_vma *vma; 1839 struct file *file; 1840 unsigned int flags = PIN_USER; 1841 unsigned int n; 1842 intel_wakeref_t wf; 1843 bool should_swap; 1844 int err; 1845 1846 if (!igt_can_allocate_thp(i915)) { 1847 pr_info("missing THP support, skipping\n"); 1848 return 0; 1849 } 1850 1851 file = mock_file(i915); 1852 if (IS_ERR(file)) 1853 return PTR_ERR(file); 1854 1855 ctx = hugepage_ctx(i915, file); 1856 if (IS_ERR(ctx)) { 1857 err = PTR_ERR(ctx); 1858 goto out; 1859 } 1860 vm = i915_gem_context_get_eb_vm(ctx); 1861 1862 /* 1863 * Sanity check shrinking huge-paged object -- make sure nothing blows 1864 * up. 1865 */ 1866 1867 obj = i915_gem_object_create_shmem(i915, SZ_2M); 1868 if (IS_ERR(obj)) { 1869 err = PTR_ERR(obj); 1870 goto out_vm; 1871 } 1872 1873 vma = i915_vma_instance(obj, vm, NULL); 1874 if (IS_ERR(vma)) { 1875 err = PTR_ERR(vma); 1876 goto out_put; 1877 } 1878 1879 wf = intel_runtime_pm_get(&i915->runtime_pm); /* active shrink */ 1880 1881 err = i915_vma_pin(vma, 0, 0, flags); 1882 if (err) 1883 goto out_wf; 1884 1885 if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { 1886 pr_info("failed to allocate THP, finishing test early\n"); 1887 goto out_unpin; 1888 } 1889 1890 err = igt_check_page_sizes(vma); 1891 if (err) 1892 goto out_unpin; 1893 1894 n = 0; 1895 1896 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1897 if (!intel_engine_can_store_dword(ce->engine)) 1898 continue; 1899 1900 err = gpu_write(ce, vma, n++, 0xdeadbeaf); 1901 if (err) 1902 break; 1903 } 1904 i915_gem_context_unlock_engines(ctx); 1905 /* 1906 * Nuke everything *before* we unpin the pages so we can be reasonably 1907 * sure that when later checking get_nr_swap_pages() that some random 1908 * leftover object doesn't steal the remaining swap space. 1909 */ 1910 i915_gem_shrink(NULL, i915, -1UL, NULL, 1911 I915_SHRINK_BOUND | 1912 I915_SHRINK_UNBOUND | 1913 I915_SHRINK_ACTIVE); 1914 i915_vma_unpin(vma); 1915 if (err) 1916 goto out_wf; 1917 1918 /* 1919 * Now that the pages are *unpinned* shrinking should invoke 1920 * shmem to truncate our pages, if we have available swap. 1921 */ 1922 should_swap = get_nr_swap_pages() > 0; 1923 i915_gem_shrink(NULL, i915, -1UL, NULL, 1924 I915_SHRINK_BOUND | 1925 I915_SHRINK_UNBOUND | 1926 I915_SHRINK_ACTIVE | 1927 I915_SHRINK_WRITEBACK); 1928 if (should_swap == i915_gem_object_has_pages(obj)) { 1929 pr_err("unexpected pages mismatch, should_swap=%s\n", 1930 str_yes_no(should_swap)); 1931 err = -EINVAL; 1932 goto out_wf; 1933 } 1934 1935 if (should_swap == (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys)) { 1936 pr_err("unexpected residual page-size bits, should_swap=%s\n", 1937 str_yes_no(should_swap)); 1938 err = -EINVAL; 1939 goto out_wf; 1940 } 1941 1942 err = i915_vma_pin(vma, 0, 0, flags); 1943 if (err) 1944 goto out_wf; 1945 1946 while (n--) { 1947 err = cpu_check(obj, n, 0xdeadbeaf); 1948 if (err) 1949 break; 1950 } 1951 1952 out_unpin: 1953 i915_vma_unpin(vma); 1954 out_wf: 1955 intel_runtime_pm_put(&i915->runtime_pm, wf); 1956 out_put: 1957 i915_gem_object_put(obj); 1958 out_vm: 1959 i915_vm_put(vm); 1960 out: 1961 fput(file); 1962 return err; 1963 } 1964 1965 int i915_gem_huge_page_mock_selftests(void) 1966 { 1967 static const struct i915_subtest tests[] = { 1968 SUBTEST(igt_mock_exhaust_device_supported_pages), 1969 SUBTEST(igt_mock_memory_region_huge_pages), 1970 SUBTEST(igt_mock_ppgtt_misaligned_dma), 1971 }; 1972 struct drm_i915_private *dev_priv; 1973 struct i915_ppgtt *ppgtt; 1974 int err; 1975 1976 dev_priv = mock_gem_device(); 1977 if (!dev_priv) 1978 return -ENOMEM; 1979 1980 /* Pretend to be a device which supports the 48b PPGTT */ 1981 RUNTIME_INFO(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; 1982 RUNTIME_INFO(dev_priv)->ppgtt_size = 48; 1983 1984 ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0); 1985 if (IS_ERR(ppgtt)) { 1986 err = PTR_ERR(ppgtt); 1987 goto out_unlock; 1988 } 1989 1990 if (!i915_vm_is_4lvl(&ppgtt->vm)) { 1991 pr_err("failed to create 48b PPGTT\n"); 1992 err = -EINVAL; 1993 goto out_put; 1994 } 1995 1996 /* If we were ever hit this then it's time to mock the 64K scratch */ 1997 if (!i915_vm_has_scratch_64K(&ppgtt->vm)) { 1998 pr_err("PPGTT missing 64K scratch page\n"); 1999 err = -EINVAL; 2000 goto out_put; 2001 } 2002 2003 err = i915_subtests(tests, ppgtt); 2004 2005 out_put: 2006 i915_vm_put(&ppgtt->vm); 2007 out_unlock: 2008 mock_destroy_device(dev_priv); 2009 return err; 2010 } 2011 2012 int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) 2013 { 2014 static const struct i915_subtest tests[] = { 2015 SUBTEST(igt_shrink_thp), 2016 SUBTEST(igt_tmpfs_fallback), 2017 SUBTEST(igt_ppgtt_smoke_huge), 2018 SUBTEST(igt_ppgtt_sanity_check), 2019 SUBTEST(igt_ppgtt_compact), 2020 SUBTEST(igt_ppgtt_mixed), 2021 SUBTEST(igt_ppgtt_huge_fill), 2022 SUBTEST(igt_ppgtt_64K), 2023 }; 2024 2025 if (!HAS_PPGTT(i915)) { 2026 pr_info("PPGTT not supported, skipping live-selftests\n"); 2027 return 0; 2028 } 2029 2030 if (intel_gt_is_wedged(to_gt(i915))) 2031 return 0; 2032 2033 return i915_live_subtests(tests, i915); 2034 } 2035