1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include <linux/pagevec.h> 8 #include <linux/shmem_fs.h> 9 #include <linux/swap.h> 10 11 #include <drm/drm_cache.h> 12 13 #include "gem/i915_gem_region.h" 14 #include "i915_drv.h" 15 #include "i915_gem_object.h" 16 #include "i915_gem_tiling.h" 17 #include "i915_gemfs.h" 18 #include "i915_scatterlist.h" 19 #include "i915_trace.h" 20 21 /* 22 * Move folios to appropriate lru and release the batch, decrementing the 23 * ref count of those folios. 24 */ 25 static void check_release_folio_batch(struct folio_batch *fbatch) 26 { 27 check_move_unevictable_folios(fbatch); 28 __folio_batch_release(fbatch); 29 cond_resched(); 30 } 31 32 void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping, 33 bool dirty, bool backup) 34 { 35 struct sgt_iter sgt_iter; 36 struct folio_batch fbatch; 37 struct folio *last = NULL; 38 struct page *page; 39 40 mapping_clear_unevictable(mapping); 41 42 folio_batch_init(&fbatch); 43 for_each_sgt_page(page, sgt_iter, st) { 44 struct folio *folio = page_folio(page); 45 46 if (folio == last) 47 continue; 48 last = folio; 49 if (dirty) 50 folio_mark_dirty(folio); 51 if (backup) 52 folio_mark_accessed(folio); 53 54 if (!folio_batch_add(&fbatch, folio)) 55 check_release_folio_batch(&fbatch); 56 } 57 if (fbatch.nr) 58 check_release_folio_batch(&fbatch); 59 60 sg_free_table(st); 61 } 62 63 int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, 64 size_t size, struct intel_memory_region *mr, 65 struct address_space *mapping, 66 unsigned int max_segment) 67 { 68 unsigned int page_count; /* restricted by sg_alloc_table */ 69 unsigned long i; 70 struct scatterlist *sg; 71 unsigned long next_pfn = 0; /* suppress gcc warning */ 72 gfp_t noreclaim; 73 int ret; 74 75 if (overflows_type(size / PAGE_SIZE, page_count)) 76 return -E2BIG; 77 78 page_count = size / PAGE_SIZE; 79 /* 80 * If there's no chance of allocating enough pages for the whole 81 * object, bail early. 82 */ 83 if (size > resource_size(&mr->region)) 84 return -ENOMEM; 85 86 if (sg_alloc_table(st, page_count, GFP_KERNEL | __GFP_NOWARN)) 87 return -ENOMEM; 88 89 /* 90 * Get the list of pages out of our struct file. They'll be pinned 91 * at this point until we release them. 92 * 93 * Fail silently without starting the shrinker 94 */ 95 mapping_set_unevictable(mapping); 96 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 97 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 98 99 sg = st->sgl; 100 st->nents = 0; 101 for (i = 0; i < page_count; i++) { 102 struct folio *folio; 103 unsigned long nr_pages; 104 const unsigned int shrink[] = { 105 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND, 106 0, 107 }, *s = shrink; 108 gfp_t gfp = noreclaim; 109 110 do { 111 cond_resched(); 112 folio = shmem_read_folio_gfp(mapping, i, gfp); 113 if (!IS_ERR(folio)) 114 break; 115 116 if (!*s) { 117 ret = PTR_ERR(folio); 118 goto err_sg; 119 } 120 121 i915_gem_shrink(NULL, i915, 2 * page_count, NULL, *s++); 122 123 /* 124 * We've tried hard to allocate the memory by reaping 125 * our own buffer, now let the real VM do its job and 126 * go down in flames if truly OOM. 127 * 128 * However, since graphics tend to be disposable, 129 * defer the oom here by reporting the ENOMEM back 130 * to userspace. 131 */ 132 if (!*s) { 133 /* reclaim and warn, but no oom */ 134 gfp = mapping_gfp_mask(mapping); 135 136 /* 137 * Our bo are always dirty and so we require 138 * kswapd to reclaim our pages (direct reclaim 139 * does not effectively begin pageout of our 140 * buffers on its own). However, direct reclaim 141 * only waits for kswapd when under allocation 142 * congestion. So as a result __GFP_RECLAIM is 143 * unreliable and fails to actually reclaim our 144 * dirty pages -- unless you try over and over 145 * again with !__GFP_NORETRY. However, we still 146 * want to fail this allocation rather than 147 * trigger the out-of-memory killer and for 148 * this we want __GFP_RETRY_MAYFAIL. 149 */ 150 gfp |= __GFP_RETRY_MAYFAIL | __GFP_NOWARN; 151 } 152 } while (1); 153 154 nr_pages = min_t(unsigned long, 155 folio_nr_pages(folio), page_count - i); 156 if (!i || 157 sg->length >= max_segment || 158 folio_pfn(folio) != next_pfn) { 159 if (i) 160 sg = sg_next(sg); 161 162 st->nents++; 163 sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0); 164 } else { 165 /* XXX: could overflow? */ 166 sg->length += nr_pages * PAGE_SIZE; 167 } 168 next_pfn = folio_pfn(folio) + nr_pages; 169 i += nr_pages - 1; 170 171 /* Check that the i965g/gm workaround works. */ 172 GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL); 173 } 174 if (sg) /* loop terminated early; short sg table */ 175 sg_mark_end(sg); 176 177 /* Trim unused sg entries to avoid wasting memory. */ 178 i915_sg_trim(st); 179 180 return 0; 181 err_sg: 182 sg_mark_end(sg); 183 if (sg != st->sgl) { 184 shmem_sg_free_table(st, mapping, false, false); 185 } else { 186 mapping_clear_unevictable(mapping); 187 sg_free_table(st); 188 } 189 190 /* 191 * shmemfs first checks if there is enough memory to allocate the page 192 * and reports ENOSPC should there be insufficient, along with the usual 193 * ENOMEM for a genuine allocation failure. 194 * 195 * We use ENOSPC in our driver to mean that we have run out of aperture 196 * space and so want to translate the error from shmemfs back to our 197 * usual understanding of ENOMEM. 198 */ 199 if (ret == -ENOSPC) 200 ret = -ENOMEM; 201 202 return ret; 203 } 204 205 static int shmem_get_pages(struct drm_i915_gem_object *obj) 206 { 207 struct drm_i915_private *i915 = to_i915(obj->base.dev); 208 struct intel_memory_region *mem = obj->mm.region; 209 struct address_space *mapping = obj->base.filp->f_mapping; 210 unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); 211 struct sg_table *st; 212 int ret; 213 214 /* 215 * Assert that the object is not currently in any GPU domain. As it 216 * wasn't in the GTT, there shouldn't be any way it could have been in 217 * a GPU cache 218 */ 219 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 220 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 221 222 rebuild_st: 223 st = kmalloc(sizeof(*st), GFP_KERNEL | __GFP_NOWARN); 224 if (!st) 225 return -ENOMEM; 226 227 ret = shmem_sg_alloc_table(i915, st, obj->base.size, mem, mapping, 228 max_segment); 229 if (ret) 230 goto err_st; 231 232 ret = i915_gem_gtt_prepare_pages(obj, st); 233 if (ret) { 234 /* 235 * DMA remapping failed? One possible cause is that 236 * it could not reserve enough large entries, asking 237 * for PAGE_SIZE chunks instead may be helpful. 238 */ 239 if (max_segment > PAGE_SIZE) { 240 shmem_sg_free_table(st, mapping, false, false); 241 kfree(st); 242 243 max_segment = PAGE_SIZE; 244 goto rebuild_st; 245 } else { 246 dev_warn(i915->drm.dev, 247 "Failed to DMA remap %zu pages\n", 248 obj->base.size >> PAGE_SHIFT); 249 goto err_pages; 250 } 251 } 252 253 if (i915_gem_object_needs_bit17_swizzle(obj)) 254 i915_gem_object_do_bit_17_swizzle(obj, st); 255 256 if (i915_gem_object_can_bypass_llc(obj)) 257 obj->cache_dirty = true; 258 259 __i915_gem_object_set_pages(obj, st); 260 261 return 0; 262 263 err_pages: 264 shmem_sg_free_table(st, mapping, false, false); 265 /* 266 * shmemfs first checks if there is enough memory to allocate the page 267 * and reports ENOSPC should there be insufficient, along with the usual 268 * ENOMEM for a genuine allocation failure. 269 * 270 * We use ENOSPC in our driver to mean that we have run out of aperture 271 * space and so want to translate the error from shmemfs back to our 272 * usual understanding of ENOMEM. 273 */ 274 err_st: 275 if (ret == -ENOSPC) 276 ret = -ENOMEM; 277 278 kfree(st); 279 280 return ret; 281 } 282 283 static int 284 shmem_truncate(struct drm_i915_gem_object *obj) 285 { 286 /* 287 * Our goal here is to return as much of the memory as 288 * is possible back to the system as we are called from OOM. 289 * To do this we must instruct the shmfs to drop all of its 290 * backing pages, *now*. 291 */ 292 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 293 obj->mm.madv = __I915_MADV_PURGED; 294 obj->mm.pages = ERR_PTR(-EFAULT); 295 296 return 0; 297 } 298 299 void __shmem_writeback(size_t size, struct address_space *mapping) 300 { 301 struct writeback_control wbc = { 302 .sync_mode = WB_SYNC_NONE, 303 .nr_to_write = SWAP_CLUSTER_MAX, 304 .range_start = 0, 305 .range_end = LLONG_MAX, 306 .for_reclaim = 1, 307 }; 308 struct folio *folio = NULL; 309 int error = 0; 310 311 /* 312 * Leave mmapings intact (GTT will have been revoked on unbinding, 313 * leaving only CPU mmapings around) and add those folios to the LRU 314 * instead of invoking writeback so they are aged and paged out 315 * as normal. 316 */ 317 while ((folio = writeback_iter(mapping, &wbc, folio, &error))) { 318 if (folio_mapped(folio)) 319 folio_redirty_for_writepage(&wbc, folio); 320 else 321 error = shmem_writeout(folio, &wbc); 322 } 323 } 324 325 static void 326 shmem_writeback(struct drm_i915_gem_object *obj) 327 { 328 __shmem_writeback(obj->base.size, obj->base.filp->f_mapping); 329 } 330 331 static int shmem_shrink(struct drm_i915_gem_object *obj, unsigned int flags) 332 { 333 switch (obj->mm.madv) { 334 case I915_MADV_DONTNEED: 335 return i915_gem_object_truncate(obj); 336 case __I915_MADV_PURGED: 337 return 0; 338 } 339 340 if (flags & I915_GEM_OBJECT_SHRINK_WRITEBACK) 341 shmem_writeback(obj); 342 343 return 0; 344 } 345 346 void 347 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 348 struct sg_table *pages, 349 bool needs_clflush) 350 { 351 struct drm_i915_private *i915 = to_i915(obj->base.dev); 352 353 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 354 355 if (obj->mm.madv == I915_MADV_DONTNEED) 356 obj->mm.dirty = false; 357 358 if (needs_clflush && 359 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 360 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 361 drm_clflush_sg(pages); 362 363 __start_cpu_write(obj); 364 /* 365 * On non-LLC igfx platforms, force the flush-on-acquire if this is ever 366 * swapped-in. Our async flush path is not trust worthy enough yet(and 367 * happens in the wrong order), and with some tricks it's conceivable 368 * for userspace to change the cache-level to I915_CACHE_NONE after the 369 * pages are swapped-in, and since execbuf binds the object before doing 370 * the async flush, we have a race window. 371 */ 372 if (!HAS_LLC(i915) && !IS_DGFX(i915)) 373 obj->cache_dirty = true; 374 } 375 376 void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages) 377 { 378 __i915_gem_object_release_shmem(obj, pages, true); 379 380 i915_gem_gtt_finish_pages(obj, pages); 381 382 if (i915_gem_object_needs_bit17_swizzle(obj)) 383 i915_gem_object_save_bit_17_swizzle(obj, pages); 384 385 shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping, 386 obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED); 387 kfree(pages); 388 obj->mm.dirty = false; 389 } 390 391 static void 392 shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) 393 { 394 if (likely(i915_gem_object_has_struct_page(obj))) 395 i915_gem_object_put_pages_shmem(obj, pages); 396 else 397 i915_gem_object_put_pages_phys(obj, pages); 398 } 399 400 static int 401 shmem_pwrite(struct drm_i915_gem_object *obj, 402 const struct drm_i915_gem_pwrite *arg) 403 { 404 struct address_space *mapping = obj->base.filp->f_mapping; 405 const struct address_space_operations *aops = mapping->a_ops; 406 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 407 u64 remain; 408 loff_t pos; 409 unsigned int pg; 410 411 /* Caller already validated user args */ 412 GEM_BUG_ON(!access_ok(user_data, arg->size)); 413 414 if (!i915_gem_object_has_struct_page(obj)) 415 return i915_gem_object_pwrite_phys(obj, arg); 416 417 /* 418 * Before we instantiate/pin the backing store for our use, we 419 * can prepopulate the shmemfs filp efficiently using a write into 420 * the pagecache. We avoid the penalty of instantiating all the 421 * pages, important if the user is just writing to a few and never 422 * uses the object on the GPU, and using a direct write into shmemfs 423 * allows it to avoid the cost of retrieving a page (either swapin 424 * or clearing-before-use) before it is overwritten. 425 */ 426 if (i915_gem_object_has_pages(obj)) 427 return -ENODEV; 428 429 if (obj->mm.madv != I915_MADV_WILLNEED) 430 return -EFAULT; 431 432 /* 433 * Before the pages are instantiated the object is treated as being 434 * in the CPU domain. The pages will be clflushed as required before 435 * use, and we can freely write into the pages directly. If userspace 436 * races pwrite with any other operation; corruption will ensue - 437 * that is userspace's prerogative! 438 */ 439 440 remain = arg->size; 441 pos = arg->offset; 442 pg = offset_in_page(pos); 443 444 do { 445 unsigned int len, unwritten; 446 struct folio *folio; 447 void *data, *vaddr; 448 int err; 449 char __maybe_unused c; 450 451 len = PAGE_SIZE - pg; 452 if (len > remain) 453 len = remain; 454 455 /* Prefault the user page to reduce potential recursion */ 456 err = __get_user(c, user_data); 457 if (err) 458 return err; 459 460 err = __get_user(c, user_data + len - 1); 461 if (err) 462 return err; 463 464 err = aops->write_begin(obj->base.filp, mapping, pos, len, 465 &folio, &data); 466 if (err < 0) 467 return err; 468 469 vaddr = kmap_local_folio(folio, offset_in_folio(folio, pos)); 470 pagefault_disable(); 471 unwritten = __copy_from_user_inatomic(vaddr, user_data, len); 472 pagefault_enable(); 473 kunmap_local(vaddr); 474 475 err = aops->write_end(obj->base.filp, mapping, pos, len, 476 len - unwritten, folio, data); 477 if (err < 0) 478 return err; 479 480 /* We don't handle -EFAULT, leave it to the caller to check */ 481 if (unwritten) 482 return -ENODEV; 483 484 remain -= len; 485 user_data += len; 486 pos += len; 487 pg = 0; 488 } while (remain); 489 490 return 0; 491 } 492 493 static int 494 shmem_pread(struct drm_i915_gem_object *obj, 495 const struct drm_i915_gem_pread *arg) 496 { 497 if (!i915_gem_object_has_struct_page(obj)) 498 return i915_gem_object_pread_phys(obj, arg); 499 500 return -ENODEV; 501 } 502 503 static void shmem_release(struct drm_i915_gem_object *obj) 504 { 505 if (i915_gem_object_has_struct_page(obj)) 506 i915_gem_object_release_memory_region(obj); 507 508 fput(obj->base.filp); 509 } 510 511 const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { 512 .name = "i915_gem_object_shmem", 513 .flags = I915_GEM_OBJECT_IS_SHRINKABLE, 514 515 .get_pages = shmem_get_pages, 516 .put_pages = shmem_put_pages, 517 .truncate = shmem_truncate, 518 .shrink = shmem_shrink, 519 520 .pwrite = shmem_pwrite, 521 .pread = shmem_pread, 522 523 .release = shmem_release, 524 }; 525 526 static int __create_shmem(struct drm_i915_private *i915, 527 struct drm_gem_object *obj, 528 resource_size_t size) 529 { 530 unsigned long flags = VM_NORESERVE; 531 struct file *filp; 532 533 drm_gem_private_object_init(&i915->drm, obj, size); 534 535 /* XXX: The __shmem_file_setup() function returns -EINVAL if size is 536 * greater than MAX_LFS_FILESIZE. 537 * To handle the same error as other code that returns -E2BIG when 538 * the size is too large, we add a code that returns -E2BIG when the 539 * size is larger than the size that can be handled. 540 * If BITS_PER_LONG is 32, size > MAX_LFS_FILESIZE is always false, 541 * so we only needs to check when BITS_PER_LONG is 64. 542 * If BITS_PER_LONG is 32, E2BIG checks are processed when 543 * i915_gem_object_size_2big() is called before init_object() callback 544 * is called. 545 */ 546 if (BITS_PER_LONG == 64 && size > MAX_LFS_FILESIZE) 547 return -E2BIG; 548 549 if (i915->mm.gemfs) 550 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 551 flags); 552 else 553 filp = shmem_file_setup("i915", size, flags); 554 if (IS_ERR(filp)) 555 return PTR_ERR(filp); 556 557 obj->filp = filp; 558 return 0; 559 } 560 561 static int shmem_object_init(struct intel_memory_region *mem, 562 struct drm_i915_gem_object *obj, 563 resource_size_t offset, 564 resource_size_t size, 565 resource_size_t page_size, 566 unsigned int flags) 567 { 568 static struct lock_class_key lock_class; 569 struct drm_i915_private *i915 = mem->i915; 570 struct address_space *mapping; 571 unsigned int cache_level; 572 gfp_t mask; 573 int ret; 574 575 ret = __create_shmem(i915, &obj->base, size); 576 if (ret) 577 return ret; 578 579 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 580 if (IS_I965GM(i915) || IS_I965G(i915)) { 581 /* 965gm cannot relocate objects above 4GiB. */ 582 mask &= ~__GFP_HIGHMEM; 583 mask |= __GFP_DMA32; 584 } 585 586 mapping = obj->base.filp->f_mapping; 587 mapping_set_gfp_mask(mapping, mask); 588 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 589 590 i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, flags); 591 obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; 592 obj->write_domain = I915_GEM_DOMAIN_CPU; 593 obj->read_domains = I915_GEM_DOMAIN_CPU; 594 595 /* 596 * MTL doesn't snoop CPU cache by default for GPU access (namely 597 * 1-way coherency). However some UMD's are currently depending on 598 * that. Make 1-way coherent the default setting for MTL. A follow 599 * up patch will extend the GEM_CREATE uAPI to allow UMD's specify 600 * caching mode at BO creation time 601 */ 602 if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))) 603 /* On some devices, we can have the GPU use the LLC (the CPU 604 * cache) for about a 10% performance improvement 605 * compared to uncached. Graphics requests other than 606 * display scanout are coherent with the CPU in 607 * accessing this cache. This means in this mode we 608 * don't need to clflush on the CPU side, and on the 609 * GPU side we only need to flush internal caches to 610 * get data visible to the CPU. 611 * 612 * However, we maintain the display planes as UC, and so 613 * need to rebind when first used as such. 614 */ 615 cache_level = I915_CACHE_LLC; 616 else 617 cache_level = I915_CACHE_NONE; 618 619 i915_gem_object_set_cache_coherency(obj, cache_level); 620 621 i915_gem_object_init_memory_region(obj, mem); 622 623 return 0; 624 } 625 626 struct drm_i915_gem_object * 627 i915_gem_object_create_shmem(struct drm_i915_private *i915, 628 resource_size_t size) 629 { 630 return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], 631 size, 0, 0); 632 } 633 634 /* Allocate a new GEM object and fill it with the supplied data */ 635 struct drm_i915_gem_object * 636 i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, 637 const void *data, resource_size_t size) 638 { 639 struct drm_i915_gem_object *obj; 640 struct file *file; 641 const struct address_space_operations *aops; 642 loff_t pos; 643 int err; 644 645 GEM_WARN_ON(IS_DGFX(i915)); 646 obj = i915_gem_object_create_shmem(i915, round_up(size, PAGE_SIZE)); 647 if (IS_ERR(obj)) 648 return obj; 649 650 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 651 652 file = obj->base.filp; 653 aops = file->f_mapping->a_ops; 654 pos = 0; 655 do { 656 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 657 struct folio *folio; 658 void *fsdata; 659 660 err = aops->write_begin(file, file->f_mapping, pos, len, 661 &folio, &fsdata); 662 if (err < 0) 663 goto fail; 664 665 memcpy_to_folio(folio, offset_in_folio(folio, pos), data, len); 666 667 err = aops->write_end(file, file->f_mapping, pos, len, len, 668 folio, fsdata); 669 if (err < 0) 670 goto fail; 671 672 size -= len; 673 data += len; 674 pos += len; 675 } while (size); 676 677 return obj; 678 679 fail: 680 i915_gem_object_put(obj); 681 return ERR_PTR(err); 682 } 683 684 static int init_shmem(struct intel_memory_region *mem) 685 { 686 i915_gemfs_init(mem->i915); 687 intel_memory_region_set_name(mem, "system"); 688 689 return 0; /* We have fallback to the kernel mnt if gemfs init failed. */ 690 } 691 692 static int release_shmem(struct intel_memory_region *mem) 693 { 694 i915_gemfs_fini(mem->i915); 695 return 0; 696 } 697 698 static const struct intel_memory_region_ops shmem_region_ops = { 699 .init = init_shmem, 700 .release = release_shmem, 701 .init_object = shmem_object_init, 702 }; 703 704 struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915, 705 u16 type, u16 instance) 706 { 707 return intel_memory_region_create(i915, 0, 708 totalram_pages() << PAGE_SHIFT, 709 PAGE_SIZE, 0, 0, 710 type, instance, 711 &shmem_region_ops); 712 } 713 714 bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj) 715 { 716 return obj->ops == &i915_gem_shmem_ops; 717 } 718